data have;
input session_id name : $12. time;
cards;
1 pd_load 12
1 entry_load 13
1 entry_start 12
1 pd_time 13
1 entry_time 9
1 entry_text .
1 pd_text .
1 pd_device .
2 pd_device .
2 pd_text .
2 pd_time 7
2 pd_load 11
3 pd_text .
3 pd_load 10
3 entry_start 4
3 entry_text .
;
proc sql;
create table want(drop=t1 t2) as
select a.session_id,a.name,t1,ifn(a.time=.,b.time,a.time)as time ,t2
from have a left join (select session_id,scan(name,1,'_') as t1,scan(name,-1,'_') as t2,time from have where scan(name,-1,'_') in ('start','load')) b
on a.session_id=b.session_id and scan(a.name,1,'_')=t1
group by a.session_id,a.name,t1
having max(t2)=t2;
quit;
Hi @lydiawawa
Alright, I think if my understanding is correct, the following should solve the problem
data have;
input session_id name : $50. time;
cards;
1 pd_login_load_time 12
1 pd_entry_confirm_load_time 13
1 pd_entry_confirm_start_time 12
1 pd_save_time 13
1 pd_entry_login_time 9
1 pd_entry_confirm_text 12
1 pd_login_text 12
1 pd_login_device 12
2 pd_confirm_device 11
2 pd_confirm_text 11
2 pd_login_time 7
2 pd_confirm_load_time 11
3 pd_login_text 10
3 pd_login_load_time 10
3 pd_entry_confirm_start_time 4
3 pd_entry_confirm_text 4
4 pd_HH_dash_lang_entry_ind 5
4 pd_HH_dash_load_time 5
5 pd_dob_close_browser_time 6
5 pd_dob_lang_entry_ind 23
5 pd_dob_lang_exit_ind 23
5 pd_dob_load_time 23
5 pd_dob_next_time 11
5 pd_home_lang_entry_ind 16
5 pd_home_lang_exit_ind 16
5 pd_home_lang_load_time 16
5 pd_home_lang_next_time 18
;
proc sort data=have out=_have;
by session_id descending time;
run;
data want;
if _n_=1 then do;
dcl hash H (multidata:'y') ;
h.definekey ("session_id") ;
h.definedata ("t1","ti") ;
h.definedone () ;
end;
do until(last.session_id);
set _have end=lr;
by session_id;
if time>. then
do;
k=findw(name,'load','_');
k1=findw(name,'start','_');
ti=time;
if k>0 then t1='load';
else if k1>0 then t1='start';
if sum(k,k1)>0 then rc=h.add() ;
end;
else if time=. then do;
do rc=h.find() by 0 while(rc=0);
if t1='start' then start=ti;
else if t1='load' then load=ti;
rc=h.find_next();
end;
time=Coalesce(start,load);
end;
output;
end;
keep session_id name time;
run;
And for some reason, I feel silly that somewhere I overlooked and didn't comprehend properly. Really have to work on my communication and comprehension skills.
Let me know
PS
1. No need to find patterns
2. We are going with session_id has keys
3. Populate non missings with start,load in hash object
4. Look up using session_id as key
5. Fetch the time
and we are done!
@lydiawawa Did the above work for you?
HI @lydiawawa Just had a look at your edited sample.
I can't spot "_sessionstart_time"
So for your sample, session we go with _load_time?
Hi @lydiawawa I followed your code to understand the split of the name that forms the group. I think t3 is perhaps not needed to accomplish what you want.
I made some minor adjustments to your code on noticing your comment "The code does not work on pd_login_lang_entry_ind and pd_login_lang_exit_ind instead of being missing, they took the value of
pd_respondent_load_time."
Please review your code that i adjusted below
data have;
input session_id name : $70. time;
rownum=_n_;
cards;
1 pd_confirm_resp_for_quest_adr_yes_ind_time 22.7
1 pd_confirm_next_time 23.7
1 pd_confirm_load_time 13.3
1 pd_confirm_lang_entry_ind .
1 pd_confirm_lang_exit_ind .
1 pd_confirm_close_browser_time 24
1 pd_login_resp_user_id_time 2.1
1 pd_login_login_time 12.2
1 pd_login_lang_entry_ind .
1 pd_login_lang_exit_ind .
1 pd_login_close_browser_time 36.8
1 pd_respondent_p_first_name_time 40.3
1 pd_respondent_p_middle_name_time 41.6
1 pd_respondent_p_last_name_time 35.8
1 pd_respondent_load_time 35.8
1 pd_respondent_lang_entry_ind .
1 pd_respondent_lang_exit_ind .
1 pd_respondent_closemodal_time 137.6
1 pd_respondent_close_browser_time 140
1 pd_respondent_backmodal_time 75
1 pd_verifyaddress_next_time 34.6
1 pd_verifyaddress_load_time 24.7
1 pd_verifyaddress_lang_entry_ind .
1 pd_verifyaddress_lang_exit_ind .
1 pd_verifyaddress_h_occ_yes_ind_time 33.7
1 pd_verifyaddress_close_browser_time 35.1
;
data have;
set have;
t1 = scan(strip(name), 2, '_');
t3 = scan(strip(name), 3, '_');
run;
proc sort data=have out=_have;
by session_id t1 descending time;
run;
data want;
set _have;
by session_id t1 descending time;
retain temp ;
if first.session_id or first.t1 then temp=.;
if (t3 = "load" or t3 = "sessionstart") and not missing(time) then temp=time;
else if missing(time) then time = temp;
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.