BookmarkSubscribeRSS Feed
makset
Obsidian | Level 7

I have a table "a" with the following data

 

data A;
informat datetime  datetime21.;
input info $7. datetime Nr ; 
format datetime  datetime21.;
cards;
GC_5_0	30APR2020:00:30:00	1
GC_5_2	29JAN2016:16:00:00	2
GC_50_4	24NOV2010:21:00:00	3
GC_5_1	28OCT2003:13:35:00	4
GC_8_1	01AUG2016:11:20:00	5
GC_5_3	09AUG2016:18:10:00	6
GC_10_2	06SEP2019:06:40:00	7
GC_5_0	14MAY2015:00:20:00	8
GC_15_2	27NOV2006:09:45:00	9
GC_5_2	06FEB2019:12:55:00	10
;

In this case, 3 big data sets: GC_5_0, GC_5_1, GC_5_2 (variable info from table A), each approximately 1 GB.

 

example:

 

data GC_5_0;
informat datetime  datetime21.;
input datetime	high_temp	low_temp	end_temp; 
format datetime  datetime21.;
cards;
30APR2020:00:00:00	31.3	25.6	29.4
30APR2020:00:00:30	29.9	28.4	29.2
30APR2020:00:01:00	29.5	28.9	29.2
30APR2020:00:01:30	29.5	28.6	29
30APR2020:00:02:00	30	28.8	29.9
30APR2020:00:02:30	30.3	29.5	29.9
30APR2020:00:03:00	30.3	29.2	29.9
30APR2020:00:03:30	30.3	29	30.1
30APR2020:00:04:00	30.2	29.3	29.3
30APR2020:00:04:30	30.6	29.4	29.4
30APR2020:00:05:00	30.8	29.6	29.6
30APR2020:00:05:30	31	30	30.3
30APR2020:00:06:00	31	29.7	30.2
30APR2020:00:06:30	31	29.7	30.5
30APR2020:00:07:00	30.4	29.4	29.6
30APR2020:00:07:30	30	27.6	28.4
30APR2020:00:08:00	29.6	28.1	28.9
30APR2020:00:08:30	30.2	28.6	29.4
30APR2020:00:09:00	30	28.9	29.3
30APR2020:00:09:30	30.5	28.4	28.4
30APR2020:00:10:00	31.7	30.9	31.2
30APR2020:00:10:30	32	30.6	30.6
30APR2020:00:11:00	31.4	29.7	30.8
30APR2020:00:11:30	31.1	29.3	29.8
30APR2020:00:12:00	29.4	28.6	29
30APR2020:00:12:30	29.4	28.2	29
30APR2020:00:13:00	30.4	28.7	30.4
30APR2020:00:13:30	31.7	29.5	30.8
30APR2020:00:14:00	33.8	31.8	33
30APR2020:00:14:30	33	30.5	31.2
30APR2020:00:15:00	31.2	30.4	30.4
30APR2020:00:15:30	30.5	30	30
30APR2020:00:16:00	30.6	29	30.4
30APR2020:00:16:30	30	28.9	29.7
30APR2020:00:17:00	29.9	29.3	29.5
30APR2020:00:17:30	30.6	29.3	29.3
30APR2020:00:18:00	30.2	29.3	29.3
30APR2020:00:18:30	29.6	28.5	28.5
30APR2020:00:19:00	29.6	27.9	28.4
30APR2020:00:19:30	29.4	28.5	28.5
30APR2020:00:20:30	28.8	28	28
30APR2020:00:21:00	31.1	24.2	31
30APR2020:00:21:30	31.1	29.6	31.1
30APR2020:00:22:00	30.7	29.7	30.2
30APR2020:00:22:30	30.4	29.7	29.7
30APR2020:00:23:00	30.4	29.5	30.3
30APR2020:00:23:30	30.5	29.4	30.5
30APR2020:00:24:00	31	29.1	30.6
30APR2020:00:24:30	30.4	29.3	30.4
30APR2020:00:25:00	31.9	29.9	30.6
30APR2020:00:25:30	31.6	30.2	31.2
30APR2020:00:26:00	31	29.6	29.6
30APR2020:00:26:30	30.6	28.4	28.4
30APR2020:00:27:00	30.7	29.7	30.4
30APR2020:00:27:30	30.8	29.7	30.4
30APR2020:00:28:00	31.2	30	30.7
30APR2020:00:29:00	31	29.5	30.1
30APR2020:00:29:30	30.4	29.4	29.7
30APR2020:00:30:00	30.3	28.7	30.3
30APR2020:00:30:30	30.1	29.6	30.1
30APR2020:00:31:00	30	28.6	28.7
30APR2020:00:31:30	29.9	28.1	28.1
30APR2020:00:32:00	30.3	29.6	29.6
30APR2020:00:32:30	30.7	29.2	29.8
30APR2020:00:33:00	29.4	28.5	28.5
30APR2020:00:33:30	29.7	28.7	29.2
30APR2020:00:34:00	30.2	28.9	30.2
30APR2020:00:34:30	29.8	29	29.8
30APR2020:00:35:00	30.4	28.8	29.6
30APR2020:00:35:30	30.2	29	29.6
30APR2020:00:36:00	30.1	29.3	29.5
30APR2020:00:36:30	30.4	28.6	29.6
30APR2020:00:37:00	30.7	29.4	30
30APR2020:00:37:30	30.9	29.2	29.2
30APR2020:00:38:00	30.2	28.8	29.6
30APR2020:00:38:30	29.8	28.9	29.3
30APR2020:00:39:00	29.6	28.7	28.7
30APR2020:00:40:00	30.7	28.6	30.7
30APR2020:00:40:30	30.9	29.3	30.4
30APR2020:00:41:00	30.5	29.5	30.2
30APR2020:00:41:30	30.6	28.6	29.9
30APR2020:00:42:00	31.3	29.4	31.3
30APR2020:00:42:30	30.9	29.3	29.9
30APR2020:00:43:00	30.1	28.9	29.7
30APR2020:00:43:30	30.5	28.8	30.5
30APR2020:00:44:00	30.2	28.4	28.4
30APR2020:00:44:30	30.2	28	29.5
30APR2020:00:45:00	30.4	29.3	29.3
30APR2020:00:45:30	30.7	28.9	30.2
30APR2020:00:46:00	30.1	28.8	29.9
30APR2020:00:46:30	29.6	28.8	29.3
30APR2020:00:47:00	29	26.6	27.8
30APR2020:00:47:30	27.4	26.4	27.4
30APR2020:00:48:00	29.8	27.3	29.2
30APR2020:00:48:30	29.9	29.2	29.2
30APR2020:00:49:00	29.9	28.8	28.8
30APR2020:00:49:30	30.3	28.7	29.3
30APR2020:00:50:00	30	28.6	29.2
30APR2020:00:51:00	29.4	28.8	29.4
30APR2020:00:51:30	29.9	28.5	29.9
run;


data GC_5_2;
informat datetime datetime21.;
input datetime high_temp low_temp end_temp;
format datetime datetime21.;
cards;
30APR2020:00:00:00 31.3 25.6 29.4
30APR2020:00:00:30 29.9 28.4 29.2
30APR2020:00:01:00 29.5 28.9 29.2
30APR2020:00:01:30 29.5 28.6 29
30APR2020:00:02:00 30 28.8 29.9
30APR2020:00:02:30 30.3 29.5 29.9
30APR2020:00:03:00 30.3 29.2 29.9
30APR2020:00:03:30 30.3 29 30.1
30APR2020:00:04:00 30.2 29.3 29.3
30APR2020:00:04:30 30.6 29.4 29.4
30APR2020:00:05:00 30.8 29.6 29.6
30APR2020:00:05:30 31 30 30.3
30APR2020:00:06:00 31 29.7 30.2
30APR2020:00:06:30 31 29.7 30.5
30APR2020:00:07:00 30.4 29.4 29.6
30APR2020:00:07:30 30 27.6 28.4
30APR2020:00:08:00 29.6 28.1 28.9
30APR2020:00:08:30 30.2 28.6 29.4
30APR2020:00:09:00 30 28.9 29.3
30APR2020:00:09:30 30.5 28.4 28.4
30APR2020:00:10:00 31.7 30.9 31.2
30APR2020:00:10:30 32 30.6 30.6
30APR2020:00:11:00 31.4 29.7 30.8
30APR2020:00:11:30 31.1 29.3 29.8
30APR2020:00:12:00 29.4 28.6 29
30APR2020:00:12:30 29.4 28.2 29
30APR2020:00:13:00 30.4 28.7 30.4
30APR2020:00:13:30 31.7 29.5 30.8
30APR2020:00:14:00 33.8 31.8 33
30APR2020:00:14:30 33 30.5 31.2
30APR2020:00:15:00 31.2 30.4 30.4
30APR2020:00:15:30 30.5 30 30
30APR2020:00:16:00 30.6 29 30.4
30APR2020:00:16:30 30 28.9 29.7
30APR2020:00:17:00 29.9 29.3 29.5
30APR2020:00:17:30 30.6 29.3 29.3
30APR2020:00:18:00 30.2 29.3 29.3
30APR2020:00:18:30 29.6 28.5 28.5
30APR2020:00:19:00 29.6 27.9 28.4
30APR2020:00:19:30 29.4 28.5 28.5
30APR2020:00:20:30 28.8 28 28
30APR2020:00:21:00 31.1 24.2 31
30APR2020:00:21:30 31.1 29.6 31.1
30APR2020:00:22:00 30.7 29.7 30.2
30APR2020:00:22:30 30.4 29.7 29.7
30APR2020:00:23:00 30.4 29.5 30.3
30APR2020:00:23:30 30.5 29.4 30.5
30APR2020:00:24:00 31 29.1 30.6
30APR2020:00:24:30 30.4 29.3 30.4
30APR2020:00:25:00 31.9 29.9 30.6
30APR2020:00:25:30 31.6 30.2 31.2
30APR2020:00:26:00 31 29.6 29.6
30APR2020:00:26:30 30.6 28.4 28.4
30APR2020:00:27:00 30.7 29.7 30.4
30APR2020:00:27:30 30.8 29.7 30.4
30APR2020:00:28:00 31.2 30 30.7
30APR2020:00:29:00 31 29.5 30.1
30APR2020:00:29:30 30.4 29.4 29.7
30APR2020:00:30:00 30.3 28.7 30.3
30APR2020:00:30:30 30.1 29.6 30.1
30APR2020:00:31:00 30 28.6 28.7
30APR2020:00:31:30 29.9 28.1 28.1
30APR2020:00:32:00 30.3 29.6 29.6
30APR2020:00:32:30 30.7 29.2 29.8
30APR2020:00:33:00 29.4 28.5 28.5
30APR2020:00:33:30 29.7 28.7 29.2
30APR2020:00:34:00 30.2 28.9 30.2
30APR2020:00:34:30 29.8 29 29.8
30APR2020:00:35:00 30.4 28.8 29.6
30APR2020:00:35:30 30.2 29 29.6
30APR2020:00:36:00 30.1 29.3 29.5
30APR2020:00:36:30 30.4 28.6 29.6
30APR2020:00:37:00 30.7 29.4 30
30APR2020:00:37:30 30.9 29.2 29.2
30APR2020:00:38:00 30.2 28.8 29.6
30APR2020:00:38:30 29.8 28.9 29.3
30APR2020:00:39:00 29.6 28.7 28.7
30APR2020:00:40:00 30.7 28.6 30.7
30APR2020:00:40:30 30.9 29.3 30.4
30APR2020:00:41:00 30.5 29.5 30.2
30APR2020:00:41:30 30.6 28.6 29.9
30APR2020:00:42:00 31.3 29.4 31.3
30APR2020:00:42:30 30.9 29.3 29.9
30APR2020:00:43:00 30.1 28.9 29.7
30APR2020:00:43:30 30.5 28.8 30.5
30APR2020:00:44:00 30.2 28.4 28.4
30APR2020:00:44:30 30.2 28 29.5
30APR2020:00:45:00 30.4 29.3 29.3
30APR2020:00:45:30 30.7 28.9 30.2
30APR2020:00:46:00 30.1 28.8 29.9
30APR2020:00:46:30 29.6 28.8 29.3
30APR2020:00:47:00 29 26.6 27.8
30APR2020:00:47:30 27.4 26.4 27.4
30APR2020:00:48:00 29.8 27.3 29.2
30APR2020:00:48:30 29.9 29.2 29.2
30APR2020:00:49:00 29.9 28.8 28.8
30APR2020:00:49:30 30.3 28.7 29.3
30APR2020:00:50:00 30 28.6 29.2
30APR2020:00:51:00 29.4 28.8 29.4
30APR2020:00:51:30 29.9 28.5 29.9
run;

I want to reach the following table

 

 

data wont; 
informat datetime  datetime21.;
input datetime	high_temp	low_temp	end_temp; 
format datetime  datetime21.;
cards;
/* from GC_5_0;/**/
30APR2020:00:19:30	29.4	28.5	28.5
30APR2020:00:20:30	28.8	28	28
30APR2020:00:21:00	31.1	24.2	31
30APR2020:00:21:30	31.1	29.6	31.1
30APR2020:00:22:00	30.7	29.7	30.2
30APR2020:00:22:30	30.4	29.7	29.7
30APR2020:00:23:00	30.4	29.5	30.3
30APR2020:00:23:30	30.5	29.4	30.5
30APR2020:00:24:00	31	29.1	30.6
30APR2020:00:24:30	30.4	29.3	30.4
30APR2020:00:25:00	31.9	29.9	30.6
30APR2020:00:25:30	31.6	30.2	31.2
30APR2020:00:26:00	31	29.6	29.6
30APR2020:00:26:30	30.6	28.4	28.4
30APR2020:00:27:00	30.7	29.7	30.4
30APR2020:00:27:30	30.8	29.7	30.4
30APR2020:00:28:00	31.2	30	30.7
30APR2020:00:29:00	31	29.5	30.1
30APR2020:00:29:30	30.4	29.4	29.7
30APR2020:00:30:00	30.3	28.7	30.3	/*<------ datetime from 'A'/**/
30APR2020:00:30:30	30.1	29.6	30.1
30APR2020:00:31:00	30	28.6	28.7
30APR2020:00:31:30	29.9	28.1	28.1
30APR2020:00:32:00	30.3	29.6	29.6
30APR2020:00:32:30	30.7	29.2	29.8
30APR2020:00:33:00	29.4	28.5	28.5
30APR2020:00:33:30	29.7	28.7	29.2
30APR2020:00:34:00	30.2	28.9	30.2
30APR2020:00:34:30	29.8	29	29.8
30APR2020:00:35:00	30.4	28.8	29.6
30APR2020:00:35:30	30.2	29	29.6
30APR2020:00:36:00	30.1	29.3	29.5
30APR2020:00:36:30	30.4	28.6	29.6
30APR2020:00:37:00	30.7	29.4	30
30APR2020:00:37:30	30.9	29.2	29.2
30APR2020:00:38:00	30.2	28.8	29.6
30APR2020:00:38:30	29.8	28.9	29.3
30APR2020:00:39:00	29.6	28.7	28.7
30APR2020:00:40:00	30.7	28.6	30.7
30APR2020:00:40:30	30.9	29.3	30.4
/* from GC_5_2;/**/
30APR2020:00:30:00	30.3	28.7	30.3
30APR2020:00:30:30	30.1	29.6	30.1
30APR2020:00:31:00	30	28.6	28.7
30APR2020:00:31:30	29.9	28.1	28.1
30APR2020:00:32:00	30.3	29.6	29.6
30APR2020:00:32:30	30.7	29.2	29.8
30APR2020:00:33:00	29.4	28.5	28.5
30APR2020:00:33:30	29.7	28.7	29.2
30APR2020:00:34:00	30.2	28.9	30.2
30APR2020:00:34:30	29.8	29	29.8
30APR2020:00:35:00	30.4	28.8	29.6
30APR2020:00:35:30	30.2	29	29.6
30APR2020:00:36:00	30.1	29.3	29.5
30APR2020:00:36:30	30.4	28.6	29.6
30APR2020:00:37:00	30.7	29.4	30
30APR2020:00:37:30	30.9	29.2	29.2
30APR2020:00:38:00	30.2	28.8	29.6
30APR2020:00:38:30	29.8	28.9	29.3
30APR2020:00:39:00	29.6	28.7	28.7
30APR2020:00:40:00	30.7	28.6	30.7	/*<------ datetime from 'A'/**/
30APR2020:00:40:30	30.9	29.3	30.4
30APR2020:00:41:00	30.5	29.5	30.2
30APR2020:00:41:30	30.6	28.6	29.9
30APR2020:00:42:00	31.3	29.4	31.3
30APR2020:00:42:30	30.9	29.3	29.9
30APR2020:00:43:00	30.1	28.9	29.7
30APR2020:00:43:30	30.5	28.8	30.5
30APR2020:00:44:00	30.2	28.4	28.4
30APR2020:00:44:30	30.2	28	29.5
30APR2020:00:45:00	30.4	29.3	29.3
30APR2020:00:45:30	30.7	28.9	30.2
30APR2020:00:46:00	30.1	28.8	29.9
30APR2020:00:46:30	29.6	28.8	29.3
30APR2020:00:47:00	29	26.6	27.8
30APR2020:00:47:30	27.4	26.4	27.4
30APR2020:00:48:00	29.8	27.3	29.2
30APR2020:00:48:30	29.9	29.2	29.2
30APR2020:00:49:00	29.9	28.8	28.8
30APR2020:00:49:30	30.3	28.7	29.3
30APR2020:00:50:00	30	28.6	29.2
/*and so on/**/
run;

Each subset has 40 observations in table 'wont' form table gc_5_0 gc_5_1 and so on (sometimes there is no temperature measurement therefore datetime - 600  < datetime < datetime + 600 It will not work).

Sometimes datetime may not be exactly from set 'A'

_n_ = 1 GC_5_0	30APR2020:00:30:00

then we look for

30APR2020:00:30:00 +- 30

Obviously table A is much bigger and there are more different sets in the variable 'info'.

I tried to do this with the hash table but it fails. And I can't handle it. I am asking for a quick way because the source tables exceed 1 GB.
I hope that I have presented the problem clearly and understandably.
Best regards and thank you in advance for your help.



 

 

 

 

2 REPLIES 2
Reeza
Super User
1GB isn't a large data set for SAS in most cases. What makes you think it is?
Post your has code if you're having issues.

Are you trying to get 40 observations or a window of time (30 minutes it seems) relative to the time in table 1? I think its the latter but that isn't clear to me.
makset
Obsidian | Level 7

@Reeza wrote:
1GB isn't a large data set for SAS in most cases. What makes you think it is?

A hundred files of 1GB is a lot, especially if the application is to work more or less in real time (10 seconds is a lot, 1 min is forever).


@Reeza wrote:
Are you trying to get 40 observations or a window of time (30 minutes it seems) relative to the time in table 1? I think its the latter but that isn't clear to me.

40 observation.


data A;
informat datetime  datetime21.;
input info $7. datetime Nr ; 
format datetime  datetime21.;
cards;
GC_5_0	30APR2020:00:30:00	1
GC_5_2	30APR2020:00:40:00	2
GC_50_4	24NOV2010:21:00:00	3
GC_5_1	28OCT2003:13:35:00	4
GC_8_1	01AUG2016:11:20:00	5
GC_5_3	09AUG2016:18:10:00	6
GC_10_2	06SEP2019:06:40:00	7
GC_5_0	30APR2020:00:39:30	8
GC_15_2	27NOV2006:09:45:00	9
GC_5_2	30APR2020:00:17:00	10
run;

data GC_5_0;
set GC_5_0;
nrdb = _n_;
run;

data GC_5_2;
set GC_5_2;
nrdb = _n_;
run;


%macro test();

*	for simplicity;
data AA;
set A;
if info = 'GC_5_0' | info = 'GC_5_2';
run;

	proc sort data =  AA  out =  AA_count nodupkey;
	key info;
	run;

*	Obs_count variable needed for the loop;
data _NULL_;
 if 0 then set AA_count nobs = n;
 call symputx('Obs_count',n);
 stop;
run;
%put no. of observations = &Obs_count;

*	I need to search by number of observations, not by datetime;
				data _null_;
				if _n_ = 1 then do;
				  declare hash H (multidata:"y");
				  H.definekey("info");
				  H.defineData(	"datetime","Nr");
				  H.definedone();
				end;
				  do until (last.info);
				  set AA;
				  by info;
				  H.add();
				  end;
				  H.output (dataset: catx("_", "AA_", info));
				  H.clear();
				run;

%do i = 1  %to &Obs_count %by 1;

*	extracting the source file name;
data _NULL_;
set AA_count;
if _n_ = &i then do;
call symputx('info',info);
end;
run;

%put &info;

		data aa__&info;
		set aa__&info;
		if _n_ = 1 then do;
		  declare hash H1 (dataset:"&info");
		  H1.definekey("datetime");
		  H1.definedata("nrdb");
		  H1.definedone();
		end;
		  if H1.find() = 0 then  nrdb = nrdb;
		run;

*	for simplicity, remove undiscovered data;
		data aa__&info;
		set aa__&info;
		if nrdb ne .;
		run;

		data wont_&i;
		set  aa__&info;
		if _n_ = 1
		then do;
		  declare hash H (dataset:"&info");
		  H.definekey("nrdb");
		  H.definedata("datetime","high_temp","low_temp","end_temp");
		  H.definedone();
		end;
		do nrdb = nrdb - 20 to nrdb + 19 by 1;
		  if H.find() = 0 then output;
			high_temp = high_temp;
			low_temp = low_temp;
			end_temp = end_temp;
		end;
		run;

*	merging partial tables into the final table;
	%if &i = 1 %then %do;

			data wont;
			set  wont_&i (drop = nrdb);
			run;

	%end;
	%else %do;

			data wont;
			set  wont wont_&i (drop = nrdb);
			run;
	%end;

%end;

	proc sort data =  wont  out =  wont;
	key nr;
	run;

%mend test;
%test;

Maybe it can be done faster and smarter.

Best regards

 

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 2 replies
  • 700 views
  • 0 likes
  • 2 in conversation