hello everybody
I have a little problem and appreciate if anyone could help me.
I have intraday data (Trading data of each second):
Sample of my data:
TRD_EVENT_DT | TRD_EVENT_TM | TRD_EVENT_MS | TRD_STCK_ID | TRD_STCK_CD | TRD_INSTR_NAME | TRD_STCK_GRP_ID_CD | TRD_PR | TRD_TUROVR | TRD_TRA_NR |
3/24/2008 | 12:19:35 | IRB5IKCO8751 | IKCQ1 | 187191011 | N2 | 1 | 100 | 122197901 | |
3/24/2008 | 12:28:01 | IRO1ALBZ0001 | ALBZ1 | والبر1 | N1 | 1537 | 10000 | 122198037 | |
3/24/2008 | 13:13:44 | IRO1ALBZ0001 | ALBZ1 | والبر1 | N1 | 1567 | 10 | 9999198173 | |
3/24/2008 | 12:20:38 | IRO1AZAB0001 | AZAB1 | فاذر1 | N2 | 683 | 10000 | 122197911 | |
3/24/2008 | 13:13:44 | IRO1AZAB0001 | AZAB1 | فاذر1 | N2 | 695 | 10 | 9999198167 | |
3/24/2008 | 09:14:57 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 9260 | 122197242 | |
3/24/2008 | 09:15:06 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 2000 | 122197245 | |
3/24/2008 | 09:15:14 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 10000 | 122197247 | |
3/24/2008 | 09:15:24 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 6000 | 122197249 | |
3/24/2008 | 09:29:27 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 10000 | 122197296 | |
3/24/2008 | 12:28:00 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 10000 | 122198036 | |
3/24/2008 | 12:28:07 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 850 | 10000 | 122198041 | |
3/24/2008 | 13:13:44 | IRO1BALI0001 | BALI1 | وبوعلي1 | N2 | 865 | 10 | 9999198133 |
Explanation of headers of some columns:
[TRD_EVENT_DT] Month / Day / Year
,[TRD_EVENT_TM] Time: hh:mm:ss
,[TRD_EVENT_MS] Millisecond
,[TRD_STCK_ID]
,[TRD_STCK_CD]
,[TRD_INSTR_NAME]
,[TRD_STCK_GRP_ID_CD]
,[TRD_PR] Stock Price
,[TRD_TUROVR] Turnover
,[TRD_TRA_NR]
First:
I want to categorize this data in half an hour periods:
First half an hour: 9:00 << Volume_1 < 9:30
Second half an hour: 9:30 << Volume_2 < 10:00
Third half an hour: 10:00 << Volume_3 < 10:30
Fourth half an hour: 10:30 << Volume_4 < 11
Fifth half an hour: 11:00 << Volume_5 < 11:30
Sixth half an hour: 11:30 << Volume_6 << 12
Second:
I want to normalize my data by below equation:
Volume = Stock Price * Turnover
Third:
I want to analyze this data by the regression specified below:
Normalized Volume of half an hour = intercept + dummy var_1 [First half an hou] + dummy var_2 [Socend half an hour] + dummy var_3 [Third half an hour] + dummy var_4 [Fourth half an hour] + dummy var_5 [Fifth half an hour] + dummy var_6 [Sixth half an hour] + Residual
My question is how can I do each step using SAS.
Thanks for your attention.
*Importing Data;
/*
PROC IMPORT DATAFILE= 'D:\SampleData\Table87.xls'
OUT= SampleData87
DBMS= XLS;
RUN;
*/
proc contents data= Sampledata87;
run;
***********************************
*STEP 1: ROUNDING TIME;
***********************************
;
data Sampledata87_RT;
set Sampledata87;
TRD_EVENT_TIME = INPUT(TRD_EVENT_TM,time16.);
TRD_EVENT_ROUNDED = ROUND(TRD_EVENT_TIME,'00:30't);
TRD_EVENT_ROUFOR = PUT(TRD_EVENT_ROUNDED,hhmm.);
***********************************
*STEP 2: CALCULATION Of VOLUME;
***********************************
;
CountedVOLUME = TRD_PR*TRD_TUROVR;
***********************************
*STEP 3: NORMALIZED VOLUME;
***********************************
;
*Denominator
/*Sort by TRD_STCK_CD and temporal variables.*/;
proc sort data=Sampledata87_RT out=Sampledata87_SumVol;
by TRD_EVENT_DT;
run;
/*Sum VOLUME until the last of each TRD_STCK_CD is reached.*/
data Sampledata87_SumVolSo;
set Sampledata87_SumVol;
by TRD_EVENT_DT
TRD_STCK_CD notsorted;
format TRD_STCK_CD $5.;
informat TRD_STCK_CD $5.;
retain tmp_volume_sum;
tmp_volume_sum + CountedVOLUME;
if last.TRD_STCK_CD then do;
DailyVolume = tmp_volume_sum;
call missing(tmp_volume_sum);
end;
drop tmp_:;
run;
*The numerator
/*Sum VOLUME until the last of each TRD_STCK_CD is reached.*/;
data Sampledata87_SumVolSo;
set Sampledata87_SumVolSo;
by TRD_EVENT_DT
TRD_STCK_CD
TRD_EVENT_ROUFOR notsorted;
retain tmp_intradayvolume_sum;
tmp_intradayvolume_sum + CountedVOLUME;
if last.TRD_EVENT_ROUFOR then do;
IntradayVolume = tmp_intradayvolume_sum;
call missing(tmp_intradayvolume_sum);
end;
drop tmp_:;
run;
* Another way for calculating daily volume based on data set;
/*
proc sql noprint;
create table sums as
select TRD_STCK_CD, TRD_EVENT_DT, sum(CountedVOLUME) as volume_sum
from Sampledata87_SumVolSo
group by TRD_STCK_CD, TRD_EVENT_DT;
create index TRD_STCK_CD on sums;
quit;
data Sampledata87_SumVolSo02;
set Sampledata87_SumVolSo;
by TRD_EVENT_DT
TRD_STCK_CD notsorted;
volume_sum = .;
if last.TRD_STCK_CD then
set sums key=TRD_STCK_CD;
run;
*/;
*Calculating Volume by Data set;
*Division for calculating of adjusted volume in approach 1;
proc sort data=sampledata87_sumvolso out=sampledata87_sumvolso;
by TRD_STCK_CD TRD_EVENT_DT;
run;
data sampledata87_adjvol;
do until(last.TRD_STCK_CD);
do until(last.TRD_EVENT_DT);
set sampledata87_sumvolso;
by TRD_STCK_CD TRD_EVENT_DT;
if first.TRD_STCK_CD then
n=0;
if first.TRD_EVENT_DT then
n+1;
if n>1 then
do;
if not missing(IntradayVolume) then
adjusted_volume=divide(IntradayVolume,temp);
else call missing(adjusted_volume);
end;
if last.TRD_EVENT_DT then
temp=dailyvolume;
output;
end;
end;
drop temp n;
run;
proc sort data = sampledata87_adjvol;
by TRD_EVENT_DT TRD_STCK_CD;
run;
***********************************
STEP 4: RUNNING REGRESSION
***********************************
;
*Generating Dummy Variable Automatically;
* Regressing dummy variables on normalized volume variable using calculated volume;
proc genmod data=Sampledata87_adjvol;
class TRD_EVENT_ROUFOR / param=effect;
model adjusted_volume = TRD_EVENT_ROUFOR / noscale;
ods select ParameterEstimates;
run;
/* Same analysis by using the CLASS statement */
proc glm data=Sampledata87_adjvol;
class TRD_EVENT_ROUFOR; /* Generates dummy variables internally */
model adjusted_volume = TRD_EVENT_ROUFOR / solution;
ods select ParameterEstimates;
quit;
Re: #1
Is it a SAS time? If so, because SAS stores time as numbers so you can use functions to round it, ie ROUND() or FLOOR(), or MOD().It would be slightly easier if you could just round but that would be 9:15 to 10:14 intervals rather than 9:00 to 9:30.
EDIT:
Re #3
Once your time is rounded, use PROC TRANSPOSE to flip your data and then use PROC REG. You can also look at the procedures under SAS/ETS if you're doing time series analysis. Especially PROC TIMESERIES, may help with #1 as well.
Hello,
Thanks for your attention;
A sample of my data has been attached to previous massege.
Format of my data is mdb (Access). Please instruct me how to use Round statement for rounding time.
round(time, 30) as time_rounded
You can do that within a SQL query to round the time.
*Importing Data;
/*
PROC IMPORT DATAFILE= 'D:\SampleData\Table87.xls'
OUT= SampleData87
DBMS= XLS;
RUN;
*/
proc contents data= Sampledata87;
run;
***********************************
*STEP 1: ROUNDING TIME;
***********************************
;
data Sampledata87_RT;
set Sampledata87;
TRD_EVENT_TIME = INPUT(TRD_EVENT_TM,time16.);
TRD_EVENT_ROUNDED = ROUND(TRD_EVENT_TIME,'00:30't);
TRD_EVENT_ROUFOR = PUT(TRD_EVENT_ROUNDED,hhmm.);
***********************************
*STEP 2: CALCULATION Of VOLUME;
***********************************
;
CountedVOLUME = TRD_PR*TRD_TUROVR;
***********************************
*STEP 3: NORMALIZED VOLUME;
***********************************
;
*Denominator
/*Sort by TRD_STCK_CD and temporal variables.*/;
proc sort data=Sampledata87_RT out=Sampledata87_SumVol;
by TRD_EVENT_DT;
run;
/*Sum VOLUME until the last of each TRD_STCK_CD is reached.*/
data Sampledata87_SumVolSo;
set Sampledata87_SumVol;
by TRD_EVENT_DT
TRD_STCK_CD notsorted;
format TRD_STCK_CD $5.;
informat TRD_STCK_CD $5.;
retain tmp_volume_sum;
tmp_volume_sum + CountedVOLUME;
if last.TRD_STCK_CD then do;
DailyVolume = tmp_volume_sum;
call missing(tmp_volume_sum);
end;
drop tmp_:;
run;
*The numerator
/*Sum VOLUME until the last of each TRD_STCK_CD is reached.*/;
data Sampledata87_SumVolSo;
set Sampledata87_SumVolSo;
by TRD_EVENT_DT
TRD_STCK_CD
TRD_EVENT_ROUFOR notsorted;
retain tmp_intradayvolume_sum;
tmp_intradayvolume_sum + CountedVOLUME;
if last.TRD_EVENT_ROUFOR then do;
IntradayVolume = tmp_intradayvolume_sum;
call missing(tmp_intradayvolume_sum);
end;
drop tmp_:;
run;
* Another way for calculating daily volume based on data set;
/*
proc sql noprint;
create table sums as
select TRD_STCK_CD, TRD_EVENT_DT, sum(CountedVOLUME) as volume_sum
from Sampledata87_SumVolSo
group by TRD_STCK_CD, TRD_EVENT_DT;
create index TRD_STCK_CD on sums;
quit;
data Sampledata87_SumVolSo02;
set Sampledata87_SumVolSo;
by TRD_EVENT_DT
TRD_STCK_CD notsorted;
volume_sum = .;
if last.TRD_STCK_CD then
set sums key=TRD_STCK_CD;
run;
*/;
*Calculating Volume by Data set;
*Division for calculating of adjusted volume in approach 1;
proc sort data=sampledata87_sumvolso out=sampledata87_sumvolso;
by TRD_STCK_CD TRD_EVENT_DT;
run;
data sampledata87_adjvol;
do until(last.TRD_STCK_CD);
do until(last.TRD_EVENT_DT);
set sampledata87_sumvolso;
by TRD_STCK_CD TRD_EVENT_DT;
if first.TRD_STCK_CD then
n=0;
if first.TRD_EVENT_DT then
n+1;
if n>1 then
do;
if not missing(IntradayVolume) then
adjusted_volume=divide(IntradayVolume,temp);
else call missing(adjusted_volume);
end;
if last.TRD_EVENT_DT then
temp=dailyvolume;
output;
end;
end;
drop temp n;
run;
proc sort data = sampledata87_adjvol;
by TRD_EVENT_DT TRD_STCK_CD;
run;
***********************************
STEP 4: RUNNING REGRESSION
***********************************
;
*Generating Dummy Variable Automatically;
* Regressing dummy variables on normalized volume variable using calculated volume;
proc genmod data=Sampledata87_adjvol;
class TRD_EVENT_ROUFOR / param=effect;
model adjusted_volume = TRD_EVENT_ROUFOR / noscale;
ods select ParameterEstimates;
run;
/* Same analysis by using the CLASS statement */
proc glm data=Sampledata87_adjvol;
class TRD_EVENT_ROUFOR; /* Generates dummy variables internally */
model adjusted_volume = TRD_EVENT_ROUFOR / solution;
ods select ParameterEstimates;
quit;
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.