BookmarkSubscribeRSS Feed
makset
Obsidian | Level 7

Hello

Two days ago I discovered the hash method and I see great potential in it, but now I don't have time to learn and I need a solution. Could some good guy optimize the following code (create B table and especially macro DataSetCreate) for the hash table.

 

I have an intuition that it can be included in one quick procedure.

 

Thank you in advance for your help


data A;
informat datetime  datetime21.;
input datetime Nr ; 
format datetime  datetime21.;
cards;
01NOV2012:20:00:00	112113
19DEC2013:00:00:00	112113
29OCT2014:23:00:00	112113
07DEC2017:23:00:00	112113
28JAN2004:23:00:00	112114
28MAR2006:23:00:00	112114
19OCT2011:00:00:00	112114
16DEC2013:19:00:00	112114
23SEP2014:19:00:00	112114
11FEB2019:19:00:00	112114
11MAR2008:17:00:00	112115
15MAY2009:20:00:00	112115
10MAR2011:05:00:00	112115
06FEB2012:04:00:00	112115
27MAR2012:14:00:00	112115
03MAR2015:21:00:00	112115
20JAN2016:13:00:00	112115
05AUG2016:22:00:00	112115
01FEB2019:20:00:00	112115
21AUG2018:23:00:00	112122
26FEB2013:00:00:00	112123
29APR2014:17:00:00	112123
12MAY2016:22:00:00	112123
19OCT2018:21:00:00	112123
15MAY2003:09:00:00	112124
19NOV2003:05:00:00	112124
05JUN2009:12:00:00	112124
02SEP2016:01:00:00	112124
06JUN2006:15:00:00	112125
15JAN2016:21:00:00	112125
08FEB2017:08:00:00	112125
18MAY2018:22:00:00	112125
09MAY2019:19:00:00	112125
05APR2004:17:00:00	112132
09JAN2009:04:00:00	112132
17OCT2012:04:00:00	112132
26JUL2018:23:00:00	112132
02AUG2018:19:00:00	112145
12AUG2016:19:00:00	112213
25FEB2014:22:00:00	112214
08AUG2019:23:00:00	112214
02DEC2013:13:00:00	112215
30APR2014:05:00:00	112215
18SEP2014:00:00:00	112215
05MAY2016:13:00:00	112215
24FEB2017:21:00:00	112215
09JUN2017:02:00:00	112215
07MAR2019:18:00:00	112215
04NOV2019:20:00:00	112215
25MAR2009:19:00:00	112223
09FEB2010:21:00:00	112223
24MAY2011:00:00:00	112223
09NOV2011:05:00:00	112223
13APR2012:07:00:00	112223
05MAY2015:22:00:00	112223
15AUG2016:05:00:00	112223
run;



data B;
set A;
output;
do i = 1 to 59;	
datetime = INTNX('second',datetime, 60);
output;
end;
drop i;
run;

data B;
set B ;
temp = rand("Uniform") * 10;
drop Nr;
run;

%macro DataSetCreate();

%let dsid = %sysfunc(open(A));
%let obs = %sysfunc(attrn(&dsid,nlobs));
%let rc = %sysfunc(close(&dsid));

%do i = 1 %to &obs; 	

	data  _null_;
	set A;
		if _n_ = &i then do;
		call symput("datetime" , datetime);
		call symput("Nr" , Nr);
		end;
	run;
		%let Nr = &Nr;

		data _null_;
		set B;
		if datetime = &datetime then do; 
		call symput("nrdb" , _n_);
		end;
		run;

			data B_&i._&Nr;
			set B;
			if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
			datetime = INTNX('second',datetime,- 60);
			run;

%end;
%mend DataSetCreate;

%DataSetCreate;

 

12 REPLIES 12
PeterClemmensen
Tourmaline | Level 20

Please give a description of what you want the B table to look like. And what you want the macro result to be?

makset
Obsidian | Level 7
data B_112113;
informat datetime  datetime21.;
input datetime temperature	temperature_1	temperature_2	temperature_3	temperature_4	temperature_5	temperature_6	temperature_7	temperature_8	temperature_9	temperature_10 ; 
format datetime  datetime21.;
cards;
01NOV2012:20:09:00	5.3236033372	8.576786397	5.4113848088	9.7418741253	3.5913227149	5.6276161131	5.2907386329	7.9579330469	8.6438692524	8.9479117864	5.3236033372
19DEC2013:00:09:00	0.0988801033	1.9708661479	0.923279433	8.9369205316	6.3919116347	9.3058412173	6.9498889288	5.8367105178	5.4225223232	1.6780249448	0.0988801033
29OCT2014:23:09:00	1.0845115222	5.6013268162	7.8389588278	7.2490334092	4.9445350212	1.068029434	0.6934379367	8.5052750329	8.8292096113	2.0018741512	1.0845115222
07DEC2017:23:09:00	3.6536939186	0.6068207975	0.4270497942	5.0366269541	5.1472254982	1.0142804869	7.4558170699	0.2896813233	6.1450612033	8.0979537475	3.6536939186
run;
makset
Obsidian | Level 7
Sorry i thought i wrote this macro quite simply.
Table A has 56 observations. The loop has 56 iterations. With each iteration 'datetime' and 'Nr' is taken. Next, we look up the _n_ from table B that matches the 'datetime' variable. We create a table named B_Nr. 'Nr' is the variable from table A. Then table B_Nr is transposed. More elements are added to the B_Nr table until 'Nr' will change
Kurt_Bremser
Super User

Let's try it another way:

B seems to be your main dataset, and A is used to specify a subset. Is that correct? And you want to apply a value from A to observations within a certain range of the datetime of A?

makset
Obsidian | Level 7

@Kurt_Bremser wrote:

Let's try it another way:

B seems to be your main dataset, and A is used to specify a subset. Is that correct? And you want to apply a value from A to observations within a certain range of the datetime of A?


And you want to apply a value from A to observations within a certain range of the datetime of --> B  <--?

 

Yes exactly. Thank you for your interest and trying to help
Kurt_Bremser
Super User

Try this:

data A;
informat datetime  datetime21.;
input datetime Nr ; 
format datetime  datetime21.;
cards;
01NOV2012:20:00:00  112113
19DEC2013:00:00:00  112113
29OCT2014:23:00:00  112113
07DEC2017:23:00:00  112113
28JAN2004:23:00:00  112114
28MAR2006:23:00:00  112114
19OCT2011:00:00:00  112114
16DEC2013:19:00:00  112114
23SEP2014:19:00:00  112114
11FEB2019:19:00:00  112114
11MAR2008:17:00:00  112115
15MAY2009:20:00:00  112115
10MAR2011:05:00:00  112115
06FEB2012:04:00:00  112115
27MAR2012:14:00:00  112115
03MAR2015:21:00:00  112115
20JAN2016:13:00:00  112115
05AUG2016:22:00:00  112115
01FEB2019:20:00:00  112115
21AUG2018:23:00:00  112122
26FEB2013:00:00:00  112123
29APR2014:17:00:00  112123
12MAY2016:22:00:00  112123
19OCT2018:21:00:00  112123
15MAY2003:09:00:00  112124
19NOV2003:05:00:00  112124
05JUN2009:12:00:00  112124
02SEP2016:01:00:00  112124
06JUN2006:15:00:00  112125
15JAN2016:21:00:00  112125
08FEB2017:08:00:00  112125
18MAY2018:22:00:00  112125
09MAY2019:19:00:00  112125
05APR2004:17:00:00  112132
09JAN2009:04:00:00  112132
17OCT2012:04:00:00  112132
26JUL2018:23:00:00  112132
02AUG2018:19:00:00  112145
12AUG2016:19:00:00  112213
25FEB2014:22:00:00  112214
08AUG2019:23:00:00  112214
02DEC2013:13:00:00  112215
30APR2014:05:00:00  112215
18SEP2014:00:00:00  112215
05MAY2016:13:00:00  112215
24FEB2017:21:00:00  112215
09JUN2017:02:00:00  112215
07MAR2019:18:00:00  112215
04NOV2019:20:00:00  112215
25MAR2009:19:00:00  112223
09FEB2010:21:00:00  112223
24MAY2011:00:00:00  112223
09NOV2011:05:00:00  112223
13APR2012:07:00:00  112223
05MAY2015:22:00:00  112223
15AUG2016:05:00:00  112223
;

data B;
set A;
call streaminit(1);
start = datetime;
do i = 0 to 59; 
  datetime = intnx('second',start,i * 60);
  temperature = rand("Uniform") * 10;
  output;
end;
drop i nr start;
run;

data want;
set b;
if _n_ = 1
then do;
  length nr 8;
  declare hash a (dataset:"a");
  a.definekey('datetime');
  a.definedata('nr');
  a.definedone();
  nr = .;
end;
do datetime = datetime - 600 to datetime by 60;
  if a.find() = 0 then output;
end;
run;

Depending on the granularity of your datetimes, you may have to change the DO loop. If you have fractions of seconds, I recommend to remove those first.

makset
Obsidian | Level 7
Thank you for your help.

Works, and at the beginning it is ok, but please make three changes.

1. first, it's a _n_ search, not a datetime search.
In the original set B, a few observations may be missing (no temperature measurement) and I want the subsets of Table B to be equal.

in my code:

	set A;
		if _n_ = &i then do;
		call symput("datetime" , datetime);
		call symput("Nr" , Nr);
		end;
	run;
		data _null_;
		set B;
		if datetime = &datetime then do; 
		call symput("nrdb" , _n_);
		end;
		run;
			data B_temp;
			set B;
			if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
			datetime = INTNX('second',datetime,- 60);
			run;

2. secondly, it is transposed every 10 observations (depends on the sampling range). in the garden set B there is still a variable pressure.

							data B_temp;
							  do i = 1 to 10;
								set  B_temp;
								    array temperature_	temperature_1	-	temperature_10;
								    temperature_[i] = temperature;
							  end;
							  drop i;
							run;

3. third, dividing table B into separate tables depending on the variable "Nr"

makset
Obsidian | Level 7

Well, I have something but maybe it can be easier

data B;
set B;
nrdb = _n_;
run;

data aa;
set a;
if _n_ = 1
then do;
  declare hash b (dataset:"b");
  b.definekey('datetime');
  b.definedata('nrdb');
  b.definedone();
end;
  if b.find() = 0 then  nrdb = nrdb;
run;

data want;
set b;
if _n_ = 1
then do;
  length nr 8;
  declare hash aa (dataset:"aa");
  aa.definekey('nrdb');
  aa.definedata('nr');
  aa.definedone();
  nr = .;
end;
do nrdb = nrdb - 9 to nrdb by 1;
  if aa.find() = 0 then output;
end;
run;
makset
Obsidian | Level 7

Update codes. Only macro changes.

Sorry for the mistake.
 
data A;
informat datetime  datetime21.;
input datetime Nr ; 
format datetime  datetime21.;
cards;
01NOV2012:20:00:00	112113
19DEC2013:00:00:00	112113
29OCT2014:23:00:00	112113
07DEC2017:23:00:00	112113
28JAN2004:23:00:00	112114
28MAR2006:23:00:00	112114
19OCT2011:00:00:00	112114
16DEC2013:19:00:00	112114
23SEP2014:19:00:00	112114
11FEB2019:19:00:00	112114
11MAR2008:17:00:00	112115
15MAY2009:20:00:00	112115
10MAR2011:05:00:00	112115
06FEB2012:04:00:00	112115
27MAR2012:14:00:00	112115
03MAR2015:21:00:00	112115
20JAN2016:13:00:00	112115
05AUG2016:22:00:00	112115
01FEB2019:20:00:00	112115
21AUG2018:23:00:00	112122
26FEB2013:00:00:00	112123
29APR2014:17:00:00	112123
12MAY2016:22:00:00	112123
19OCT2018:21:00:00	112123
15MAY2003:09:00:00	112124
19NOV2003:05:00:00	112124
05JUN2009:12:00:00	112124
02SEP2016:01:00:00	112124
06JUN2006:15:00:00	112125
15JAN2016:21:00:00	112125
08FEB2017:08:00:00	112125
18MAY2018:22:00:00	112125
09MAY2019:19:00:00	112125
05APR2004:17:00:00	112132
09JAN2009:04:00:00	112132
17OCT2012:04:00:00	112132
26JUL2018:23:00:00	112132
02AUG2018:19:00:00	112145
12AUG2016:19:00:00	112213
25FEB2014:22:00:00	112214
08AUG2019:23:00:00	112214
02DEC2013:13:00:00	112215
30APR2014:05:00:00	112215
18SEP2014:00:00:00	112215
05MAY2016:13:00:00	112215
24FEB2017:21:00:00	112215
09JUN2017:02:00:00	112215
07MAR2019:18:00:00	112215
04NOV2019:20:00:00	112215
25MAR2009:19:00:00	112223
09FEB2010:21:00:00	112223
24MAY2011:00:00:00	112223
09NOV2011:05:00:00	112223
13APR2012:07:00:00	112223
05MAY2015:22:00:00	112223
15AUG2016:05:00:00	112223
run;



data B;
set A;
output;
do i = 1 to 59;	
datetime = INTNX('second',datetime, 60);
output;
end;
drop i;
run;

data B;
set B ;
temperature = rand("Uniform") * 10;
drop Nr;
run;

%macro DataSetCreate();

%let dsid = %sysfunc(open(A));
%let obs = %sysfunc(attrn(&dsid,nlobs));
%let rc = %sysfunc(close(&dsid));

%do i = 1 %to &obs; 	

	data  _null_;
	set A;
		if _n_ = &i then do;
		call symput("datetime" , datetime);
		call symput("Nr" , Nr);
		end;
	run;
		%let Nr = &Nr;

		data _null_;
		set B;
		if datetime = &datetime then do; 
		call symput("nrdb" , _n_);
		end;
		run;

		%if %sysfunc(exist(B_&Nr))  %then %do;

			data B_temp;
			set B;
			if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
			datetime = INTNX('second',datetime,- 60);
			run;

							data B_temp;
							  do i = 1 to 10;
								set  B_temp;
								    array temperature_	temperature_1	-	temperature_10;
								    temperature_[i] = temperature;
							  end;
							  drop i;
							run;

			data B_&Nr;
			set B_&Nr B_temp;
			run;


		%end; %else %do;

			data B_&Nr;
			set B;
			if (&nrdb + 1) <= _n_ <= (&nrdb + 10);
			datetime = INTNX('second',datetime,- 60);
			run;

							data B_&Nr;
							  do i = 1 to 10;
								set  B_&Nr;	
								    array temperature_	temperature_1	-	temperature_10;
								    temperature_[i] = temperature;
							  end;
							  drop i;
							run;

		%end;

%end;
%mend DataSetCreate;

%DataSetCreate;

 

PeterClemmensen
Tourmaline | Level 20

Ok. Again. Please explain what the logic behind the macro is?

makset
Obsidian | Level 7
Table A has 56 observations. The loop has 56 iterations. With each iteration 'datetime' and 'Nr' is taken. Next, we look up the _n_ from table B that matches the 'datetime' variable. We create a table named B_Nr. 'Nr' is the variable from table A. Then table B_Nr is transposed. More elements are added to the B_Nr table until 'Nr' will change
How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 12 replies
  • 2532 views
  • 0 likes
  • 3 in conversation