BookmarkSubscribeRSS Feed
makset
Obsidian | Level 7

I have a table "a" with the following data

 

data A;
informat datetime  datetime21.;
input info $7. datetime Nr ; 
format datetime  datetime21.;
cards;
GC_5_0	30APR2020:00:30:00	1
GC_5_2	29JAN2016:16:00:00	2
GC_50_4	24NOV2010:21:00:00	3
GC_5_1	28OCT2003:13:35:00	4
GC_8_1	01AUG2016:11:20:00	5
GC_5_3	09AUG2016:18:10:00	6
GC_10_2	06SEP2019:06:40:00	7
GC_5_0	14MAY2015:00:20:00	8
GC_15_2	27NOV2006:09:45:00	9
GC_5_2	06FEB2019:12:55:00	10
;

In this case, 3 big data sets: GC_5_0, GC_5_1, GC_5_2 (variable info from table A), each approximately 1 GB.

 

example:

 

data GC_5_0;
informat datetime  datetime21.;
input datetime	high_temp	low_temp	end_temp; 
format datetime  datetime21.;
cards;
30APR2020:00:00:00	31.3	25.6	29.4
30APR2020:00:00:30	29.9	28.4	29.2
30APR2020:00:01:00	29.5	28.9	29.2
30APR2020:00:01:30	29.5	28.6	29
30APR2020:00:02:00	30	28.8	29.9
30APR2020:00:02:30	30.3	29.5	29.9
30APR2020:00:03:00	30.3	29.2	29.9
30APR2020:00:03:30	30.3	29	30.1
30APR2020:00:04:00	30.2	29.3	29.3
30APR2020:00:04:30	30.6	29.4	29.4
30APR2020:00:05:00	30.8	29.6	29.6
30APR2020:00:05:30	31	30	30.3
30APR2020:00:06:00	31	29.7	30.2
30APR2020:00:06:30	31	29.7	30.5
30APR2020:00:07:00	30.4	29.4	29.6
30APR2020:00:07:30	30	27.6	28.4
30APR2020:00:08:00	29.6	28.1	28.9
30APR2020:00:08:30	30.2	28.6	29.4
30APR2020:00:09:00	30	28.9	29.3
30APR2020:00:09:30	30.5	28.4	28.4
30APR2020:00:10:00	31.7	30.9	31.2
30APR2020:00:10:30	32	30.6	30.6
30APR2020:00:11:00	31.4	29.7	30.8
30APR2020:00:11:30	31.1	29.3	29.8
30APR2020:00:12:00	29.4	28.6	29
30APR2020:00:12:30	29.4	28.2	29
30APR2020:00:13:00	30.4	28.7	30.4
30APR2020:00:13:30	31.7	29.5	30.8
30APR2020:00:14:00	33.8	31.8	33
30APR2020:00:14:30	33	30.5	31.2
30APR2020:00:15:00	31.2	30.4	30.4
30APR2020:00:15:30	30.5	30	30
30APR2020:00:16:00	30.6	29	30.4
30APR2020:00:16:30	30	28.9	29.7
30APR2020:00:17:00	29.9	29.3	29.5
30APR2020:00:17:30	30.6	29.3	29.3
30APR2020:00:18:00	30.2	29.3	29.3
30APR2020:00:18:30	29.6	28.5	28.5
30APR2020:00:19:00	29.6	27.9	28.4
30APR2020:00:19:30	29.4	28.5	28.5
30APR2020:00:20:30	28.8	28	28
30APR2020:00:21:00	31.1	24.2	31
30APR2020:00:21:30	31.1	29.6	31.1
30APR2020:00:22:00	30.7	29.7	30.2
30APR2020:00:22:30	30.4	29.7	29.7
30APR2020:00:23:00	30.4	29.5	30.3
30APR2020:00:23:30	30.5	29.4	30.5
30APR2020:00:24:00	31	29.1	30.6
30APR2020:00:24:30	30.4	29.3	30.4
30APR2020:00:25:00	31.9	29.9	30.6
30APR2020:00:25:30	31.6	30.2	31.2
30APR2020:00:26:00	31	29.6	29.6
30APR2020:00:26:30	30.6	28.4	28.4
30APR2020:00:27:00	30.7	29.7	30.4
30APR2020:00:27:30	30.8	29.7	30.4
30APR2020:00:28:00	31.2	30	30.7
30APR2020:00:29:00	31	29.5	30.1
30APR2020:00:29:30	30.4	29.4	29.7
30APR2020:00:30:00	30.3	28.7	30.3
30APR2020:00:30:30	30.1	29.6	30.1
30APR2020:00:31:00	30	28.6	28.7
30APR2020:00:31:30	29.9	28.1	28.1
30APR2020:00:32:00	30.3	29.6	29.6
30APR2020:00:32:30	30.7	29.2	29.8
30APR2020:00:33:00	29.4	28.5	28.5
30APR2020:00:33:30	29.7	28.7	29.2
30APR2020:00:34:00	30.2	28.9	30.2
30APR2020:00:34:30	29.8	29	29.8
30APR2020:00:35:00	30.4	28.8	29.6
30APR2020:00:35:30	30.2	29	29.6
30APR2020:00:36:00	30.1	29.3	29.5
30APR2020:00:36:30	30.4	28.6	29.6
30APR2020:00:37:00	30.7	29.4	30
30APR2020:00:37:30	30.9	29.2	29.2
30APR2020:00:38:00	30.2	28.8	29.6
30APR2020:00:38:30	29.8	28.9	29.3
30APR2020:00:39:00	29.6	28.7	28.7
30APR2020:00:40:00	30.7	28.6	30.7
30APR2020:00:40:30	30.9	29.3	30.4
30APR2020:00:41:00	30.5	29.5	30.2
30APR2020:00:41:30	30.6	28.6	29.9
30APR2020:00:42:00	31.3	29.4	31.3
30APR2020:00:42:30	30.9	29.3	29.9
30APR2020:00:43:00	30.1	28.9	29.7
30APR2020:00:43:30	30.5	28.8	30.5
30APR2020:00:44:00	30.2	28.4	28.4
30APR2020:00:44:30	30.2	28	29.5
30APR2020:00:45:00	30.4	29.3	29.3
30APR2020:00:45:30	30.7	28.9	30.2
30APR2020:00:46:00	30.1	28.8	29.9
30APR2020:00:46:30	29.6	28.8	29.3
30APR2020:00:47:00	29	26.6	27.8
30APR2020:00:47:30	27.4	26.4	27.4
30APR2020:00:48:00	29.8	27.3	29.2
30APR2020:00:48:30	29.9	29.2	29.2
30APR2020:00:49:00	29.9	28.8	28.8
30APR2020:00:49:30	30.3	28.7	29.3
30APR2020:00:50:00	30	28.6	29.2
30APR2020:00:51:00	29.4	28.8	29.4
30APR2020:00:51:30	29.9	28.5	29.9
run;


data GC_5_2;
informat datetime datetime21.;
input datetime high_temp low_temp end_temp;
format datetime datetime21.;
cards;
30APR2020:00:00:00 31.3 25.6 29.4
30APR2020:00:00:30 29.9 28.4 29.2
30APR2020:00:01:00 29.5 28.9 29.2
30APR2020:00:01:30 29.5 28.6 29
30APR2020:00:02:00 30 28.8 29.9
30APR2020:00:02:30 30.3 29.5 29.9
30APR2020:00:03:00 30.3 29.2 29.9
30APR2020:00:03:30 30.3 29 30.1
30APR2020:00:04:00 30.2 29.3 29.3
30APR2020:00:04:30 30.6 29.4 29.4
30APR2020:00:05:00 30.8 29.6 29.6
30APR2020:00:05:30 31 30 30.3
30APR2020:00:06:00 31 29.7 30.2
30APR2020:00:06:30 31 29.7 30.5
30APR2020:00:07:00 30.4 29.4 29.6
30APR2020:00:07:30 30 27.6 28.4
30APR2020:00:08:00 29.6 28.1 28.9
30APR2020:00:08:30 30.2 28.6 29.4
30APR2020:00:09:00 30 28.9 29.3
30APR2020:00:09:30 30.5 28.4 28.4
30APR2020:00:10:00 31.7 30.9 31.2
30APR2020:00:10:30 32 30.6 30.6
30APR2020:00:11:00 31.4 29.7 30.8
30APR2020:00:11:30 31.1 29.3 29.8
30APR2020:00:12:00 29.4 28.6 29
30APR2020:00:12:30 29.4 28.2 29
30APR2020:00:13:00 30.4 28.7 30.4
30APR2020:00:13:30 31.7 29.5 30.8
30APR2020:00:14:00 33.8 31.8 33
30APR2020:00:14:30 33 30.5 31.2
30APR2020:00:15:00 31.2 30.4 30.4
30APR2020:00:15:30 30.5 30 30
30APR2020:00:16:00 30.6 29 30.4
30APR2020:00:16:30 30 28.9 29.7
30APR2020:00:17:00 29.9 29.3 29.5
30APR2020:00:17:30 30.6 29.3 29.3
30APR2020:00:18:00 30.2 29.3 29.3
30APR2020:00:18:30 29.6 28.5 28.5
30APR2020:00:19:00 29.6 27.9 28.4
30APR2020:00:19:30 29.4 28.5 28.5
30APR2020:00:20:30 28.8 28 28
30APR2020:00:21:00 31.1 24.2 31
30APR2020:00:21:30 31.1 29.6 31.1
30APR2020:00:22:00 30.7 29.7 30.2
30APR2020:00:22:30 30.4 29.7 29.7
30APR2020:00:23:00 30.4 29.5 30.3
30APR2020:00:23:30 30.5 29.4 30.5
30APR2020:00:24:00 31 29.1 30.6
30APR2020:00:24:30 30.4 29.3 30.4
30APR2020:00:25:00 31.9 29.9 30.6
30APR2020:00:25:30 31.6 30.2 31.2
30APR2020:00:26:00 31 29.6 29.6
30APR2020:00:26:30 30.6 28.4 28.4
30APR2020:00:27:00 30.7 29.7 30.4
30APR2020:00:27:30 30.8 29.7 30.4
30APR2020:00:28:00 31.2 30 30.7
30APR2020:00:29:00 31 29.5 30.1
30APR2020:00:29:30 30.4 29.4 29.7
30APR2020:00:30:00 30.3 28.7 30.3
30APR2020:00:30:30 30.1 29.6 30.1
30APR2020:00:31:00 30 28.6 28.7
30APR2020:00:31:30 29.9 28.1 28.1
30APR2020:00:32:00 30.3 29.6 29.6
30APR2020:00:32:30 30.7 29.2 29.8
30APR2020:00:33:00 29.4 28.5 28.5
30APR2020:00:33:30 29.7 28.7 29.2
30APR2020:00:34:00 30.2 28.9 30.2
30APR2020:00:34:30 29.8 29 29.8
30APR2020:00:35:00 30.4 28.8 29.6
30APR2020:00:35:30 30.2 29 29.6
30APR2020:00:36:00 30.1 29.3 29.5
30APR2020:00:36:30 30.4 28.6 29.6
30APR2020:00:37:00 30.7 29.4 30
30APR2020:00:37:30 30.9 29.2 29.2
30APR2020:00:38:00 30.2 28.8 29.6
30APR2020:00:38:30 29.8 28.9 29.3
30APR2020:00:39:00 29.6 28.7 28.7
30APR2020:00:40:00 30.7 28.6 30.7
30APR2020:00:40:30 30.9 29.3 30.4
30APR2020:00:41:00 30.5 29.5 30.2
30APR2020:00:41:30 30.6 28.6 29.9
30APR2020:00:42:00 31.3 29.4 31.3
30APR2020:00:42:30 30.9 29.3 29.9
30APR2020:00:43:00 30.1 28.9 29.7
30APR2020:00:43:30 30.5 28.8 30.5
30APR2020:00:44:00 30.2 28.4 28.4
30APR2020:00:44:30 30.2 28 29.5
30APR2020:00:45:00 30.4 29.3 29.3
30APR2020:00:45:30 30.7 28.9 30.2
30APR2020:00:46:00 30.1 28.8 29.9
30APR2020:00:46:30 29.6 28.8 29.3
30APR2020:00:47:00 29 26.6 27.8
30APR2020:00:47:30 27.4 26.4 27.4
30APR2020:00:48:00 29.8 27.3 29.2
30APR2020:00:48:30 29.9 29.2 29.2
30APR2020:00:49:00 29.9 28.8 28.8
30APR2020:00:49:30 30.3 28.7 29.3
30APR2020:00:50:00 30 28.6 29.2
30APR2020:00:51:00 29.4 28.8 29.4
30APR2020:00:51:30 29.9 28.5 29.9
run;

I want to reach the following table

 

 

data wont; 
informat datetime  datetime21.;
input datetime	high_temp	low_temp	end_temp; 
format datetime  datetime21.;
cards;
/* from GC_5_0;/**/
30APR2020:00:19:30	29.4	28.5	28.5
30APR2020:00:20:30	28.8	28	28
30APR2020:00:21:00	31.1	24.2	31
30APR2020:00:21:30	31.1	29.6	31.1
30APR2020:00:22:00	30.7	29.7	30.2
30APR2020:00:22:30	30.4	29.7	29.7
30APR2020:00:23:00	30.4	29.5	30.3
30APR2020:00:23:30	30.5	29.4	30.5
30APR2020:00:24:00	31	29.1	30.6
30APR2020:00:24:30	30.4	29.3	30.4
30APR2020:00:25:00	31.9	29.9	30.6
30APR2020:00:25:30	31.6	30.2	31.2
30APR2020:00:26:00	31	29.6	29.6
30APR2020:00:26:30	30.6	28.4	28.4
30APR2020:00:27:00	30.7	29.7	30.4
30APR2020:00:27:30	30.8	29.7	30.4
30APR2020:00:28:00	31.2	30	30.7
30APR2020:00:29:00	31	29.5	30.1
30APR2020:00:29:30	30.4	29.4	29.7
30APR2020:00:30:00	30.3	28.7	30.3	/*<------ datetime from 'A'/**/
30APR2020:00:30:30	30.1	29.6	30.1
30APR2020:00:31:00	30	28.6	28.7
30APR2020:00:31:30	29.9	28.1	28.1
30APR2020:00:32:00	30.3	29.6	29.6
30APR2020:00:32:30	30.7	29.2	29.8
30APR2020:00:33:00	29.4	28.5	28.5
30APR2020:00:33:30	29.7	28.7	29.2
30APR2020:00:34:00	30.2	28.9	30.2
30APR2020:00:34:30	29.8	29	29.8
30APR2020:00:35:00	30.4	28.8	29.6
30APR2020:00:35:30	30.2	29	29.6
30APR2020:00:36:00	30.1	29.3	29.5
30APR2020:00:36:30	30.4	28.6	29.6
30APR2020:00:37:00	30.7	29.4	30
30APR2020:00:37:30	30.9	29.2	29.2
30APR2020:00:38:00	30.2	28.8	29.6
30APR2020:00:38:30	29.8	28.9	29.3
30APR2020:00:39:00	29.6	28.7	28.7
30APR2020:00:40:00	30.7	28.6	30.7
30APR2020:00:40:30	30.9	29.3	30.4
/* from GC_5_2;/**/
30APR2020:00:30:00	30.3	28.7	30.3
30APR2020:00:30:30	30.1	29.6	30.1
30APR2020:00:31:00	30	28.6	28.7
30APR2020:00:31:30	29.9	28.1	28.1
30APR2020:00:32:00	30.3	29.6	29.6
30APR2020:00:32:30	30.7	29.2	29.8
30APR2020:00:33:00	29.4	28.5	28.5
30APR2020:00:33:30	29.7	28.7	29.2
30APR2020:00:34:00	30.2	28.9	30.2
30APR2020:00:34:30	29.8	29	29.8
30APR2020:00:35:00	30.4	28.8	29.6
30APR2020:00:35:30	30.2	29	29.6
30APR2020:00:36:00	30.1	29.3	29.5
30APR2020:00:36:30	30.4	28.6	29.6
30APR2020:00:37:00	30.7	29.4	30
30APR2020:00:37:30	30.9	29.2	29.2
30APR2020:00:38:00	30.2	28.8	29.6
30APR2020:00:38:30	29.8	28.9	29.3
30APR2020:00:39:00	29.6	28.7	28.7
30APR2020:00:40:00	30.7	28.6	30.7	/*<------ datetime from 'A'/**/
30APR2020:00:40:30	30.9	29.3	30.4
30APR2020:00:41:00	30.5	29.5	30.2
30APR2020:00:41:30	30.6	28.6	29.9
30APR2020:00:42:00	31.3	29.4	31.3
30APR2020:00:42:30	30.9	29.3	29.9
30APR2020:00:43:00	30.1	28.9	29.7
30APR2020:00:43:30	30.5	28.8	30.5
30APR2020:00:44:00	30.2	28.4	28.4
30APR2020:00:44:30	30.2	28	29.5
30APR2020:00:45:00	30.4	29.3	29.3
30APR2020:00:45:30	30.7	28.9	30.2
30APR2020:00:46:00	30.1	28.8	29.9
30APR2020:00:46:30	29.6	28.8	29.3
30APR2020:00:47:00	29	26.6	27.8
30APR2020:00:47:30	27.4	26.4	27.4
30APR2020:00:48:00	29.8	27.3	29.2
30APR2020:00:48:30	29.9	29.2	29.2
30APR2020:00:49:00	29.9	28.8	28.8
30APR2020:00:49:30	30.3	28.7	29.3
30APR2020:00:50:00	30	28.6	29.2
/*and so on/**/
run;

Each subset has 40 observations in table 'wont' form table gc_5_0 gc_5_1 and so on (sometimes there is no temperature measurement therefore datetime - 600  < datetime < datetime + 600 It will not work).

Sometimes datetime may not be exactly from set 'A'

_n_ = 1 GC_5_0	30APR2020:00:30:00

then we look for

30APR2020:00:30:00 +- 30

Obviously table A is much bigger and there are more different sets in the variable 'info'.

I tried to do this with the hash table but it fails. And I can't handle it. I am asking for a quick way because the source tables exceed 1 GB.
I hope that I have presented the problem clearly and understandably.
Best regards and thank you in advance for your help.



 

 

 

 

2 REPLIES 2
Reeza
Super User
1GB isn't a large data set for SAS in most cases. What makes you think it is?
Post your has code if you're having issues.

Are you trying to get 40 observations or a window of time (30 minutes it seems) relative to the time in table 1? I think its the latter but that isn't clear to me.
makset
Obsidian | Level 7

@Reeza wrote:
1GB isn't a large data set for SAS in most cases. What makes you think it is?

A hundred files of 1GB is a lot, especially if the application is to work more or less in real time (10 seconds is a lot, 1 min is forever).


@Reeza wrote:
Are you trying to get 40 observations or a window of time (30 minutes it seems) relative to the time in table 1? I think its the latter but that isn't clear to me.

40 observation.


data A;
informat datetime  datetime21.;
input info $7. datetime Nr ; 
format datetime  datetime21.;
cards;
GC_5_0	30APR2020:00:30:00	1
GC_5_2	30APR2020:00:40:00	2
GC_50_4	24NOV2010:21:00:00	3
GC_5_1	28OCT2003:13:35:00	4
GC_8_1	01AUG2016:11:20:00	5
GC_5_3	09AUG2016:18:10:00	6
GC_10_2	06SEP2019:06:40:00	7
GC_5_0	30APR2020:00:39:30	8
GC_15_2	27NOV2006:09:45:00	9
GC_5_2	30APR2020:00:17:00	10
run;

data GC_5_0;
set GC_5_0;
nrdb = _n_;
run;

data GC_5_2;
set GC_5_2;
nrdb = _n_;
run;


%macro test();

*	for simplicity;
data AA;
set A;
if info = 'GC_5_0' | info = 'GC_5_2';
run;

	proc sort data =  AA  out =  AA_count nodupkey;
	key info;
	run;

*	Obs_count variable needed for the loop;
data _NULL_;
 if 0 then set AA_count nobs = n;
 call symputx('Obs_count',n);
 stop;
run;
%put no. of observations = &Obs_count;

*	I need to search by number of observations, not by datetime;
				data _null_;
				if _n_ = 1 then do;
				  declare hash H (multidata:"y");
				  H.definekey("info");
				  H.defineData(	"datetime","Nr");
				  H.definedone();
				end;
				  do until (last.info);
				  set AA;
				  by info;
				  H.add();
				  end;
				  H.output (dataset: catx("_", "AA_", info));
				  H.clear();
				run;

%do i = 1  %to &Obs_count %by 1;

*	extracting the source file name;
data _NULL_;
set AA_count;
if _n_ = &i then do;
call symputx('info',info);
end;
run;

%put &info;

		data aa__&info;
		set aa__&info;
		if _n_ = 1 then do;
		  declare hash H1 (dataset:"&info");
		  H1.definekey("datetime");
		  H1.definedata("nrdb");
		  H1.definedone();
		end;
		  if H1.find() = 0 then  nrdb = nrdb;
		run;

*	for simplicity, remove undiscovered data;
		data aa__&info;
		set aa__&info;
		if nrdb ne .;
		run;

		data wont_&i;
		set  aa__&info;
		if _n_ = 1
		then do;
		  declare hash H (dataset:"&info");
		  H.definekey("nrdb");
		  H.definedata("datetime","high_temp","low_temp","end_temp");
		  H.definedone();
		end;
		do nrdb = nrdb - 20 to nrdb + 19 by 1;
		  if H.find() = 0 then output;
			high_temp = high_temp;
			low_temp = low_temp;
			end_temp = end_temp;
		end;
		run;

*	merging partial tables into the final table;
	%if &i = 1 %then %do;

			data wont;
			set  wont_&i (drop = nrdb);
			run;

	%end;
	%else %do;

			data wont;
			set  wont wont_&i (drop = nrdb);
			run;
	%end;

%end;

	proc sort data =  wont  out =  wont;
	key nr;
	run;

%mend test;
%test;

Maybe it can be done faster and smarter.

Best regards

 

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 2 replies
  • 669 views
  • 0 likes
  • 2 in conversation