Solved: Count distinct time before each record

jovic92 · Posted 12-14-2017 05:08 AM

I have to distinct count for each record (line) how many datetime (when it's started have) have any combination (grouped by two columns) till that moment.

data have;
input id  opt  $   t_purchase   : datetime32. t_spent :datetime32.  bonus  usage ;
format  t_purchase  t_spent datetime32. ;
cards; 
1    a1  10NOV2017:12:02:00  10NOV2017:14:05:00   100     3 
1    a1  10NOV2017:15:45:00  10NOV2017:15:07:33   100     0  
1    a1  10NOV2017:12:02:00  10NOV2017:13:24:50   100     6 
2    a1  10NOV2017:13:54:00  10NOV2017:14:02:58   100     3 
1    a1  10NOV2017:19:15:00  10NOV2017:20:22:07   100    12    
2    a1  10NOV2017:13:54:00  10NOV2017:13:57:12   100     7 
;
run;

So, grouped by id and opt, count distinct t_purchase that is not newer than t_purchase of that line.

The desired output would be:

data have;
input id  opt  $   t_purchase   : datetime32. t_spent :datetime32.  bonus  usage count_t_purchase;
format  t_purchase  t_spent datetime32. ;
cards; 
1    a1  10NOV2017:12:02:00  10NOV2017:14:05:00   100     3    1
1    a1  10NOV2017:15:45:00  10NOV2017:15:07:33   100     0    2
1    a1  10NOV2017:12:02:00  10NOV2017:13:24:50   100     6    1
2    a1  10NOV2017:13:54:00  10NOV2017:14:02:58   100     3    1
1    a1  10NOV2017:19:15:00  10NOV2017:20:22:07   100    12    3
2    a1  10NOV2017:13:54:00  10NOV2017:13:57:12   100     7    1
;
run;

I have tried with the hash table

DATA
work.WANT;
if _n_=1 then do;
    if 0 then set WORK.HAVE(rename=(t_purchase=_t_purchase));
	declare hash h(dataset: 'WORK.HAVE(rename=(t_purchase=_t_purchase))', hashexp:20);
	h.definekey('id', 'opt', 't_spent');
	h.definedata('t_purchase');
	h.definedone();
	end;
set WORK.HAVE;
COUNT_purchase=0;
do i=MIN(t_purchase) to t_spent;
	if h.find(key:id, key:opt, key:i)=0 then COUNT_purchase+1;
end;
drop _t_purchase i;
run;

But I got a few error, one of them is that the t_purchase column does not exist in the table HAVE, but it is there.

Anyway, the hash table is not the only way. Any kind of solution and help is welcomed.

Thank you.

Ksharp · Posted 12-14-2017 07:40 AM

OK. How about this one .

data have;
input id  opt  $   t_purchase   : datetime32. t_spent :datetime32.  bonus  usage ;
format  t_purchase  t_spent datetime32. ;
cards; 
1    a1  10NOV2017:12:02:00  10NOV2017:14:05:00   100     3 
1    a1  10NOV2017:15:45:00  10NOV2017:15:07:33   100     0  
1    a1  10NOV2017:12:02:00  10NOV2017:13:24:50   100     6 
2    a1  10NOV2017:13:54:00  10NOV2017:14:02:58   100     3 
1    a1  10NOV2017:19:15:00  10NOV2017:20:22:07   100    12    
2    a1  10NOV2017:13:54:00  10NOV2017:13:57:12   100     7 
;
run;
proc sql;
create table temp as
 select *,min(t_purchase) as min
  from have
   group by id,opt;
quit;
DATA work.WANT;
if _n_=1 then do;
    if 0 then set WORK.temp;
	declare hash h(dataset: 'WORK.temp', hashexp:20);
	h.definekey('id', 'opt', 't_purchase');
	h.definedone();
	end;
set WORK.temp;
COUNT_purchase=0;
do i=MIN to t_purchase;
	if h.check(key:id, key:opt, key:i)=0 then COUNT_purchase+1;
end;
drop  i;
run;

View solution in original post

RW9 · Posted 12-14-2017 05:14 AM

Maybe something like:

proc sql;
  create table WANT as
  select  A.*,
          count(distinct cats(B.ID,B.OPT)) as COUNT_T_PURCHASE
  from    HAVE A
  left join HAVE B
  on      A.ID=B.ID
  and     A.OPT=B.OPT
  and     A.T_PURCHASE > B.T_PURCHASE;
quit;

I cant test at the moment, but basically join all data with date less on, then count it.

Ksharp · Posted 12-14-2017 07:40 AM

OK. How about this one .

data have;
input id  opt  $   t_purchase   : datetime32. t_spent :datetime32.  bonus  usage ;
format  t_purchase  t_spent datetime32. ;
cards; 
1    a1  10NOV2017:12:02:00  10NOV2017:14:05:00   100     3 
1    a1  10NOV2017:15:45:00  10NOV2017:15:07:33   100     0  
1    a1  10NOV2017:12:02:00  10NOV2017:13:24:50   100     6 
2    a1  10NOV2017:13:54:00  10NOV2017:14:02:58   100     3 
1    a1  10NOV2017:19:15:00  10NOV2017:20:22:07   100    12    
2    a1  10NOV2017:13:54:00  10NOV2017:13:57:12   100     7 
;
run;
proc sql;
create table temp as
 select *,min(t_purchase) as min
  from have
   group by id,opt;
quit;
DATA work.WANT;
if _n_=1 then do;
    if 0 then set WORK.temp;
	declare hash h(dataset: 'WORK.temp', hashexp:20);
	h.definekey('id', 'opt', 't_purchase');
	h.definedone();
	end;
set WORK.temp;
COUNT_purchase=0;
do i=MIN to t_purchase;
	if h.check(key:id, key:opt, key:i)=0 then COUNT_purchase+1;
end;
drop  i;
run;

jovic92 · Posted 01-29-2018 06:04 PM

Hi, I know it's been a while, but I must ask you for a favor if you could just write me a short explanation for each line of this hash function. Just u few words. Some of them I really don't get it.

I assume this is not by rules of community or appropriate, but I have no choice.

Thanks in advance and sorry for bothering you.

RW9 · Posted 01-30-2018 03:59 AM

Thats really the function of the documentation:

http://support.sas.com/documentation/cdl/en/lrcon/65287/HTML/default/viewer.htm#n1b4cbtmb049xtn1vh9x...

And there are a lot of papers out there on the subject:

http://www2.sas.com/proceedings/forum2008/029-2008.pdf

Ksharp · Posted 01-30-2018 07:26 AM

OK. The code is short .

/*************
here pick up the minimal value of t_purchase,
and data like :
id ...  min
1 ....  '01jan2010:10:10:02'dt
1 ....  '01jan2010:10:10:02'dt
1 ....  '01jan2010:10:10:02'dt

**************/
proc sql;
create table temp as
 select *,min(t_purchase) as min
  from have
   group by id,opt;
quit;



DATA work.WANT;
if _n_=1 then do;
    if 0 then set WORK.temp;
	declare hash h(dataset: 'WORK.temp', hashexp:20);

/* Here push table temp into Hash Table for querying later 
  key is ID OPT T_PURCHASE */
	h.definekey('id', 'opt', 't_purchase');
	h.definedone();
	end;
set WORK.temp;
COUNT_purchase=0;  /*for each obs, reset count number to be 0*/
do i=MIN to t_purchase; /*for each obs, do loop from min to current t_purchase,step is one second*/
  /* in Hash Table ,find whether there is an obs or not according to key id,opt,i(i.e. t_purchase)
     if find it,then count +1 */	
  if h.check(key:id, key:opt, key:i)=0 then COUNT_purchase+1;
end;
drop  i;
run;

Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Catch up on SAS Innovate 2026

Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Re: Count distinct time before each record

Catch up on SAS Innovate 2026

SAS Training: Just a Click Away