Hello
Two days ago I discovered the hash method and I see great potential in it, but now I don't have time to learn and I need a solution. Could some good guy optimize the following code (create B table and especially macro DataSetCreate) for the hash table.
I have an intuition that it can be included in one quick procedure.
Thank you in advance for your help
data A;
informat datetime datetime21.;
input datetime Nr ;
format datetime datetime21.;
cards;
01NOV2012:20:00:00 112113
19DEC2013:00:00:00 112113
29OCT2014:23:00:00 112113
07DEC2017:23:00:00 112113
28JAN2004:23:00:00 112114
28MAR2006:23:00:00 112114
19OCT2011:00:00:00 112114
16DEC2013:19:00:00 112114
23SEP2014:19:00:00 112114
11FEB2019:19:00:00 112114
11MAR2008:17:00:00 112115
15MAY2009:20:00:00 112115
10MAR2011:05:00:00 112115
06FEB2012:04:00:00 112115
27MAR2012:14:00:00 112115
03MAR2015:21:00:00 112115
20JAN2016:13:00:00 112115
05AUG2016:22:00:00 112115
01FEB2019:20:00:00 112115
21AUG2018:23:00:00 112122
26FEB2013:00:00:00 112123
29APR2014:17:00:00 112123
12MAY2016:22:00:00 112123
19OCT2018:21:00:00 112123
15MAY2003:09:00:00 112124
19NOV2003:05:00:00 112124
05JUN2009:12:00:00 112124
02SEP2016:01:00:00 112124
06JUN2006:15:00:00 112125
15JAN2016:21:00:00 112125
08FEB2017:08:00:00 112125
18MAY2018:22:00:00 112125
09MAY2019:19:00:00 112125
05APR2004:17:00:00 112132
09JAN2009:04:00:00 112132
17OCT2012:04:00:00 112132
26JUL2018:23:00:00 112132
02AUG2018:19:00:00 112145
12AUG2016:19:00:00 112213
25FEB2014:22:00:00 112214
08AUG2019:23:00:00 112214
02DEC2013:13:00:00 112215
30APR2014:05:00:00 112215
18SEP2014:00:00:00 112215
05MAY2016:13:00:00 112215
24FEB2017:21:00:00 112215
09JUN2017:02:00:00 112215
07MAR2019:18:00:00 112215
04NOV2019:20:00:00 112215
25MAR2009:19:00:00 112223
09FEB2010:21:00:00 112223
24MAY2011:00:00:00 112223
09NOV2011:05:00:00 112223
13APR2012:07:00:00 112223
05MAY2015:22:00:00 112223
15AUG2016:05:00:00 112223
run;
data B;
set A;
output;
do i = 1 to 59;
datetime = INTNX('second',datetime, 60);
output;
end;
drop i;
run;
data B;
set B ;
temp = rand("Uniform") * 10;
drop Nr;
run;
%macro DataSetCreate();
%let dsid = %sysfunc(open(A));
%let obs = %sysfunc(attrn(&dsid,nlobs));
%let rc = %sysfunc(close(&dsid));
%do i = 1 %to &obs;
data _null_;
set A;
if _n_ = &i then do;
call symput("datetime" , datetime);
call symput("Nr" , Nr);
end;
run;
%let Nr = &Nr;
data _null_;
set B;
if datetime = &datetime then do;
call symput("nrdb" , _n_);
end;
run;
data B_&i._&Nr;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
%end;
%mend DataSetCreate;
%DataSetCreate;
Please give a description of what you want the B table to look like. And what you want the macro result to be?
data B_112113;
informat datetime datetime21.;
input datetime temperature temperature_1 temperature_2 temperature_3 temperature_4 temperature_5 temperature_6 temperature_7 temperature_8 temperature_9 temperature_10 ;
format datetime datetime21.;
cards;
01NOV2012:20:09:00 5.3236033372 8.576786397 5.4113848088 9.7418741253 3.5913227149 5.6276161131 5.2907386329 7.9579330469 8.6438692524 8.9479117864 5.3236033372
19DEC2013:00:09:00 0.0988801033 1.9708661479 0.923279433 8.9369205316 6.3919116347 9.3058412173 6.9498889288 5.8367105178 5.4225223232 1.6780249448 0.0988801033
29OCT2014:23:09:00 1.0845115222 5.6013268162 7.8389588278 7.2490334092 4.9445350212 1.068029434 0.6934379367 8.5052750329 8.8292096113 2.0018741512 1.0845115222
07DEC2017:23:09:00 3.6536939186 0.6068207975 0.4270497942 5.0366269541 5.1472254982 1.0142804869 7.4558170699 0.2896813233 6.1450612033 8.0979537475 3.6536939186
run;
Let's try it another way:
B seems to be your main dataset, and A is used to specify a subset. Is that correct? And you want to apply a value from A to observations within a certain range of the datetime of A?
@Kurt_Bremser wrote:
Let's try it another way:
B seems to be your main dataset, and A is used to specify a subset. Is that correct? And you want to apply a value from A to observations within a certain range of the datetime of A?
And you want to apply a value from A to observations within a certain range of the datetime of --> B <--?
Try this:
data A;
informat datetime datetime21.;
input datetime Nr ;
format datetime datetime21.;
cards;
01NOV2012:20:00:00 112113
19DEC2013:00:00:00 112113
29OCT2014:23:00:00 112113
07DEC2017:23:00:00 112113
28JAN2004:23:00:00 112114
28MAR2006:23:00:00 112114
19OCT2011:00:00:00 112114
16DEC2013:19:00:00 112114
23SEP2014:19:00:00 112114
11FEB2019:19:00:00 112114
11MAR2008:17:00:00 112115
15MAY2009:20:00:00 112115
10MAR2011:05:00:00 112115
06FEB2012:04:00:00 112115
27MAR2012:14:00:00 112115
03MAR2015:21:00:00 112115
20JAN2016:13:00:00 112115
05AUG2016:22:00:00 112115
01FEB2019:20:00:00 112115
21AUG2018:23:00:00 112122
26FEB2013:00:00:00 112123
29APR2014:17:00:00 112123
12MAY2016:22:00:00 112123
19OCT2018:21:00:00 112123
15MAY2003:09:00:00 112124
19NOV2003:05:00:00 112124
05JUN2009:12:00:00 112124
02SEP2016:01:00:00 112124
06JUN2006:15:00:00 112125
15JAN2016:21:00:00 112125
08FEB2017:08:00:00 112125
18MAY2018:22:00:00 112125
09MAY2019:19:00:00 112125
05APR2004:17:00:00 112132
09JAN2009:04:00:00 112132
17OCT2012:04:00:00 112132
26JUL2018:23:00:00 112132
02AUG2018:19:00:00 112145
12AUG2016:19:00:00 112213
25FEB2014:22:00:00 112214
08AUG2019:23:00:00 112214
02DEC2013:13:00:00 112215
30APR2014:05:00:00 112215
18SEP2014:00:00:00 112215
05MAY2016:13:00:00 112215
24FEB2017:21:00:00 112215
09JUN2017:02:00:00 112215
07MAR2019:18:00:00 112215
04NOV2019:20:00:00 112215
25MAR2009:19:00:00 112223
09FEB2010:21:00:00 112223
24MAY2011:00:00:00 112223
09NOV2011:05:00:00 112223
13APR2012:07:00:00 112223
05MAY2015:22:00:00 112223
15AUG2016:05:00:00 112223
;
data B;
set A;
call streaminit(1);
start = datetime;
do i = 0 to 59;
datetime = intnx('second',start,i * 60);
temperature = rand("Uniform") * 10;
output;
end;
drop i nr start;
run;
data want;
set b;
if _n_ = 1
then do;
length nr 8;
declare hash a (dataset:"a");
a.definekey('datetime');
a.definedata('nr');
a.definedone();
nr = .;
end;
do datetime = datetime - 600 to datetime by 60;
if a.find() = 0 then output;
end;
run;
Depending on the granularity of your datetimes, you may have to change the DO loop. If you have fractions of seconds, I recommend to remove those first.
Works, and at the beginning it is ok, but please make three changes.
1. first, it's a _n_ search, not a datetime search.
In the original set B, a few observations may be missing (no temperature measurement) and I want the subsets of Table B to be equal.
in my code:
set A;
if _n_ = &i then do;
call symput("datetime" , datetime);
call symput("Nr" , Nr);
end;
run;
data _null_;
set B;
if datetime = &datetime then do;
call symput("nrdb" , _n_);
end;
run;
data B_temp;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
2. secondly, it is transposed every 10 observations (depends on the sampling range). in the garden set B there is still a variable pressure.
data B_temp;
do i = 1 to 10;
set B_temp;
array temperature_ temperature_1 - temperature_10;
temperature_[i] = temperature;
end;
drop i;
run;
3. third, dividing table B into separate tables depending on the variable "Nr"
Well, I have something but maybe it can be easier
data B;
set B;
nrdb = _n_;
run;
data aa;
set a;
if _n_ = 1
then do;
declare hash b (dataset:"b");
b.definekey('datetime');
b.definedata('nrdb');
b.definedone();
end;
if b.find() = 0 then nrdb = nrdb;
run;
data want;
set b;
if _n_ = 1
then do;
length nr 8;
declare hash aa (dataset:"aa");
aa.definekey('nrdb');
aa.definedata('nr');
aa.definedone();
nr = .;
end;
do nrdb = nrdb - 9 to nrdb by 1;
if aa.find() = 0 then output;
end;
run;
Update codes. Only macro changes.
data A;
informat datetime datetime21.;
input datetime Nr ;
format datetime datetime21.;
cards;
01NOV2012:20:00:00 112113
19DEC2013:00:00:00 112113
29OCT2014:23:00:00 112113
07DEC2017:23:00:00 112113
28JAN2004:23:00:00 112114
28MAR2006:23:00:00 112114
19OCT2011:00:00:00 112114
16DEC2013:19:00:00 112114
23SEP2014:19:00:00 112114
11FEB2019:19:00:00 112114
11MAR2008:17:00:00 112115
15MAY2009:20:00:00 112115
10MAR2011:05:00:00 112115
06FEB2012:04:00:00 112115
27MAR2012:14:00:00 112115
03MAR2015:21:00:00 112115
20JAN2016:13:00:00 112115
05AUG2016:22:00:00 112115
01FEB2019:20:00:00 112115
21AUG2018:23:00:00 112122
26FEB2013:00:00:00 112123
29APR2014:17:00:00 112123
12MAY2016:22:00:00 112123
19OCT2018:21:00:00 112123
15MAY2003:09:00:00 112124
19NOV2003:05:00:00 112124
05JUN2009:12:00:00 112124
02SEP2016:01:00:00 112124
06JUN2006:15:00:00 112125
15JAN2016:21:00:00 112125
08FEB2017:08:00:00 112125
18MAY2018:22:00:00 112125
09MAY2019:19:00:00 112125
05APR2004:17:00:00 112132
09JAN2009:04:00:00 112132
17OCT2012:04:00:00 112132
26JUL2018:23:00:00 112132
02AUG2018:19:00:00 112145
12AUG2016:19:00:00 112213
25FEB2014:22:00:00 112214
08AUG2019:23:00:00 112214
02DEC2013:13:00:00 112215
30APR2014:05:00:00 112215
18SEP2014:00:00:00 112215
05MAY2016:13:00:00 112215
24FEB2017:21:00:00 112215
09JUN2017:02:00:00 112215
07MAR2019:18:00:00 112215
04NOV2019:20:00:00 112215
25MAR2009:19:00:00 112223
09FEB2010:21:00:00 112223
24MAY2011:00:00:00 112223
09NOV2011:05:00:00 112223
13APR2012:07:00:00 112223
05MAY2015:22:00:00 112223
15AUG2016:05:00:00 112223
run;
data B;
set A;
output;
do i = 1 to 59;
datetime = INTNX('second',datetime, 60);
output;
end;
drop i;
run;
data B;
set B ;
temperature = rand("Uniform") * 10;
drop Nr;
run;
%macro DataSetCreate();
%let dsid = %sysfunc(open(A));
%let obs = %sysfunc(attrn(&dsid,nlobs));
%let rc = %sysfunc(close(&dsid));
%do i = 1 %to &obs;
data _null_;
set A;
if _n_ = &i then do;
call symput("datetime" , datetime);
call symput("Nr" , Nr);
end;
run;
%let Nr = &Nr;
data _null_;
set B;
if datetime = &datetime then do;
call symput("nrdb" , _n_);
end;
run;
%if %sysfunc(exist(B_&Nr)) %then %do;
data B_temp;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
data B_temp;
do i = 1 to 10;
set B_temp;
array temperature_ temperature_1 - temperature_10;
temperature_[i] = temperature;
end;
drop i;
run;
data B_&Nr;
set B_&Nr B_temp;
run;
%end; %else %do;
data B_&Nr;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
data B_&Nr;
do i = 1 to 10;
set B_&Nr;
array temperature_ temperature_1 - temperature_10;
temperature_[i] = temperature;
end;
drop i;
run;
%end;
%end;
%mend DataSetCreate;
%DataSetCreate;
Ok. Again. Please explain what the logic behind the macro is?
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.