Hello
Two days ago I discovered the hash method and I see great potential in it, but now I don't have time to learn and I need a solution. Could some good guy optimize the following code (create B table and especially macro DataSetCreate) for the hash table.
I have an intuition that it can be included in one quick procedure.
Thank you in advance for your help
data A;
informat datetime datetime21.;
input datetime Nr ;
format datetime datetime21.;
cards;
01NOV2012:20:00:00 112113
19DEC2013:00:00:00 112113
29OCT2014:23:00:00 112113
07DEC2017:23:00:00 112113
28JAN2004:23:00:00 112114
28MAR2006:23:00:00 112114
19OCT2011:00:00:00 112114
16DEC2013:19:00:00 112114
23SEP2014:19:00:00 112114
11FEB2019:19:00:00 112114
11MAR2008:17:00:00 112115
15MAY2009:20:00:00 112115
10MAR2011:05:00:00 112115
06FEB2012:04:00:00 112115
27MAR2012:14:00:00 112115
03MAR2015:21:00:00 112115
20JAN2016:13:00:00 112115
05AUG2016:22:00:00 112115
01FEB2019:20:00:00 112115
21AUG2018:23:00:00 112122
26FEB2013:00:00:00 112123
29APR2014:17:00:00 112123
12MAY2016:22:00:00 112123
19OCT2018:21:00:00 112123
15MAY2003:09:00:00 112124
19NOV2003:05:00:00 112124
05JUN2009:12:00:00 112124
02SEP2016:01:00:00 112124
06JUN2006:15:00:00 112125
15JAN2016:21:00:00 112125
08FEB2017:08:00:00 112125
18MAY2018:22:00:00 112125
09MAY2019:19:00:00 112125
05APR2004:17:00:00 112132
09JAN2009:04:00:00 112132
17OCT2012:04:00:00 112132
26JUL2018:23:00:00 112132
02AUG2018:19:00:00 112145
12AUG2016:19:00:00 112213
25FEB2014:22:00:00 112214
08AUG2019:23:00:00 112214
02DEC2013:13:00:00 112215
30APR2014:05:00:00 112215
18SEP2014:00:00:00 112215
05MAY2016:13:00:00 112215
24FEB2017:21:00:00 112215
09JUN2017:02:00:00 112215
07MAR2019:18:00:00 112215
04NOV2019:20:00:00 112215
25MAR2009:19:00:00 112223
09FEB2010:21:00:00 112223
24MAY2011:00:00:00 112223
09NOV2011:05:00:00 112223
13APR2012:07:00:00 112223
05MAY2015:22:00:00 112223
15AUG2016:05:00:00 112223
run;
data B;
set A;
output;
do i = 1 to 59;
datetime = INTNX('second',datetime, 60);
output;
end;
drop i;
run;
data B;
set B ;
temp = rand("Uniform") * 10;
drop Nr;
run;
%macro DataSetCreate();
%let dsid = %sysfunc(open(A));
%let obs = %sysfunc(attrn(&dsid,nlobs));
%let rc = %sysfunc(close(&dsid));
%do i = 1 %to &obs;
data _null_;
set A;
if _n_ = &i then do;
call symput("datetime" , datetime);
call symput("Nr" , Nr);
end;
run;
%let Nr = &Nr;
data _null_;
set B;
if datetime = &datetime then do;
call symput("nrdb" , _n_);
end;
run;
data B_&i._&Nr;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
%end;
%mend DataSetCreate;
%DataSetCreate;
Please give a description of what you want the B table to look like. And what you want the macro result to be?
data B_112113;
informat datetime datetime21.;
input datetime temperature temperature_1 temperature_2 temperature_3 temperature_4 temperature_5 temperature_6 temperature_7 temperature_8 temperature_9 temperature_10 ;
format datetime datetime21.;
cards;
01NOV2012:20:09:00 5.3236033372 8.576786397 5.4113848088 9.7418741253 3.5913227149 5.6276161131 5.2907386329 7.9579330469 8.6438692524 8.9479117864 5.3236033372
19DEC2013:00:09:00 0.0988801033 1.9708661479 0.923279433 8.9369205316 6.3919116347 9.3058412173 6.9498889288 5.8367105178 5.4225223232 1.6780249448 0.0988801033
29OCT2014:23:09:00 1.0845115222 5.6013268162 7.8389588278 7.2490334092 4.9445350212 1.068029434 0.6934379367 8.5052750329 8.8292096113 2.0018741512 1.0845115222
07DEC2017:23:09:00 3.6536939186 0.6068207975 0.4270497942 5.0366269541 5.1472254982 1.0142804869 7.4558170699 0.2896813233 6.1450612033 8.0979537475 3.6536939186
run;
Let's try it another way:
B seems to be your main dataset, and A is used to specify a subset. Is that correct? And you want to apply a value from A to observations within a certain range of the datetime of A?
@Kurt_Bremser wrote:
Let's try it another way:
B seems to be your main dataset, and A is used to specify a subset. Is that correct? And you want to apply a value from A to observations within a certain range of the datetime of A?
And you want to apply a value from A to observations within a certain range of the datetime of --> B <--?
Try this:
data A;
informat datetime datetime21.;
input datetime Nr ;
format datetime datetime21.;
cards;
01NOV2012:20:00:00 112113
19DEC2013:00:00:00 112113
29OCT2014:23:00:00 112113
07DEC2017:23:00:00 112113
28JAN2004:23:00:00 112114
28MAR2006:23:00:00 112114
19OCT2011:00:00:00 112114
16DEC2013:19:00:00 112114
23SEP2014:19:00:00 112114
11FEB2019:19:00:00 112114
11MAR2008:17:00:00 112115
15MAY2009:20:00:00 112115
10MAR2011:05:00:00 112115
06FEB2012:04:00:00 112115
27MAR2012:14:00:00 112115
03MAR2015:21:00:00 112115
20JAN2016:13:00:00 112115
05AUG2016:22:00:00 112115
01FEB2019:20:00:00 112115
21AUG2018:23:00:00 112122
26FEB2013:00:00:00 112123
29APR2014:17:00:00 112123
12MAY2016:22:00:00 112123
19OCT2018:21:00:00 112123
15MAY2003:09:00:00 112124
19NOV2003:05:00:00 112124
05JUN2009:12:00:00 112124
02SEP2016:01:00:00 112124
06JUN2006:15:00:00 112125
15JAN2016:21:00:00 112125
08FEB2017:08:00:00 112125
18MAY2018:22:00:00 112125
09MAY2019:19:00:00 112125
05APR2004:17:00:00 112132
09JAN2009:04:00:00 112132
17OCT2012:04:00:00 112132
26JUL2018:23:00:00 112132
02AUG2018:19:00:00 112145
12AUG2016:19:00:00 112213
25FEB2014:22:00:00 112214
08AUG2019:23:00:00 112214
02DEC2013:13:00:00 112215
30APR2014:05:00:00 112215
18SEP2014:00:00:00 112215
05MAY2016:13:00:00 112215
24FEB2017:21:00:00 112215
09JUN2017:02:00:00 112215
07MAR2019:18:00:00 112215
04NOV2019:20:00:00 112215
25MAR2009:19:00:00 112223
09FEB2010:21:00:00 112223
24MAY2011:00:00:00 112223
09NOV2011:05:00:00 112223
13APR2012:07:00:00 112223
05MAY2015:22:00:00 112223
15AUG2016:05:00:00 112223
;
data B;
set A;
call streaminit(1);
start = datetime;
do i = 0 to 59;
datetime = intnx('second',start,i * 60);
temperature = rand("Uniform") * 10;
output;
end;
drop i nr start;
run;
data want;
set b;
if _n_ = 1
then do;
length nr 8;
declare hash a (dataset:"a");
a.definekey('datetime');
a.definedata('nr');
a.definedone();
nr = .;
end;
do datetime = datetime - 600 to datetime by 60;
if a.find() = 0 then output;
end;
run;
Depending on the granularity of your datetimes, you may have to change the DO loop. If you have fractions of seconds, I recommend to remove those first.
Works, and at the beginning it is ok, but please make three changes.
1. first, it's a _n_ search, not a datetime search.
In the original set B, a few observations may be missing (no temperature measurement) and I want the subsets of Table B to be equal.
in my code:
set A;
if _n_ = &i then do;
call symput("datetime" , datetime);
call symput("Nr" , Nr);
end;
run;
data _null_;
set B;
if datetime = &datetime then do;
call symput("nrdb" , _n_);
end;
run;
data B_temp;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
2. secondly, it is transposed every 10 observations (depends on the sampling range). in the garden set B there is still a variable pressure.
data B_temp;
do i = 1 to 10;
set B_temp;
array temperature_ temperature_1 - temperature_10;
temperature_[i] = temperature;
end;
drop i;
run;
3. third, dividing table B into separate tables depending on the variable "Nr"
Well, I have something but maybe it can be easier
data B;
set B;
nrdb = _n_;
run;
data aa;
set a;
if _n_ = 1
then do;
declare hash b (dataset:"b");
b.definekey('datetime');
b.definedata('nrdb');
b.definedone();
end;
if b.find() = 0 then nrdb = nrdb;
run;
data want;
set b;
if _n_ = 1
then do;
length nr 8;
declare hash aa (dataset:"aa");
aa.definekey('nrdb');
aa.definedata('nr');
aa.definedone();
nr = .;
end;
do nrdb = nrdb - 9 to nrdb by 1;
if aa.find() = 0 then output;
end;
run;
Update codes. Only macro changes.
data A;
informat datetime datetime21.;
input datetime Nr ;
format datetime datetime21.;
cards;
01NOV2012:20:00:00 112113
19DEC2013:00:00:00 112113
29OCT2014:23:00:00 112113
07DEC2017:23:00:00 112113
28JAN2004:23:00:00 112114
28MAR2006:23:00:00 112114
19OCT2011:00:00:00 112114
16DEC2013:19:00:00 112114
23SEP2014:19:00:00 112114
11FEB2019:19:00:00 112114
11MAR2008:17:00:00 112115
15MAY2009:20:00:00 112115
10MAR2011:05:00:00 112115
06FEB2012:04:00:00 112115
27MAR2012:14:00:00 112115
03MAR2015:21:00:00 112115
20JAN2016:13:00:00 112115
05AUG2016:22:00:00 112115
01FEB2019:20:00:00 112115
21AUG2018:23:00:00 112122
26FEB2013:00:00:00 112123
29APR2014:17:00:00 112123
12MAY2016:22:00:00 112123
19OCT2018:21:00:00 112123
15MAY2003:09:00:00 112124
19NOV2003:05:00:00 112124
05JUN2009:12:00:00 112124
02SEP2016:01:00:00 112124
06JUN2006:15:00:00 112125
15JAN2016:21:00:00 112125
08FEB2017:08:00:00 112125
18MAY2018:22:00:00 112125
09MAY2019:19:00:00 112125
05APR2004:17:00:00 112132
09JAN2009:04:00:00 112132
17OCT2012:04:00:00 112132
26JUL2018:23:00:00 112132
02AUG2018:19:00:00 112145
12AUG2016:19:00:00 112213
25FEB2014:22:00:00 112214
08AUG2019:23:00:00 112214
02DEC2013:13:00:00 112215
30APR2014:05:00:00 112215
18SEP2014:00:00:00 112215
05MAY2016:13:00:00 112215
24FEB2017:21:00:00 112215
09JUN2017:02:00:00 112215
07MAR2019:18:00:00 112215
04NOV2019:20:00:00 112215
25MAR2009:19:00:00 112223
09FEB2010:21:00:00 112223
24MAY2011:00:00:00 112223
09NOV2011:05:00:00 112223
13APR2012:07:00:00 112223
05MAY2015:22:00:00 112223
15AUG2016:05:00:00 112223
run;
data B;
set A;
output;
do i = 1 to 59;
datetime = INTNX('second',datetime, 60);
output;
end;
drop i;
run;
data B;
set B ;
temperature = rand("Uniform") * 10;
drop Nr;
run;
%macro DataSetCreate();
%let dsid = %sysfunc(open(A));
%let obs = %sysfunc(attrn(&dsid,nlobs));
%let rc = %sysfunc(close(&dsid));
%do i = 1 %to &obs;
data _null_;
set A;
if _n_ = &i then do;
call symput("datetime" , datetime);
call symput("Nr" , Nr);
end;
run;
%let Nr = &Nr;
data _null_;
set B;
if datetime = &datetime then do;
call symput("nrdb" , _n_);
end;
run;
%if %sysfunc(exist(B_&Nr)) %then %do;
data B_temp;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
data B_temp;
do i = 1 to 10;
set B_temp;
array temperature_ temperature_1 - temperature_10;
temperature_[i] = temperature;
end;
drop i;
run;
data B_&Nr;
set B_&Nr B_temp;
run;
%end; %else %do;
data B_&Nr;
set B;
if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
datetime = INTNX('second',datetime,- 60);
run;
data B_&Nr;
do i = 1 to 10;
set B_&Nr;
array temperature_ temperature_1 - temperature_10;
temperature_[i] = temperature;
end;
drop i;
run;
%end;
%end;
%mend DataSetCreate;
%DataSetCreate;
Ok. Again. Please explain what the logic behind the macro is?
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.