Dear all,
I have a data set grouped by ID and Transaction_Quarter as follows:
ID Trns_QTR Val_A Val_B
100 2010Q1 1456 345
100 2010Q2 2500 0
100 2010Q3 2500 0
100 2010Q4 2500 0
200 2010Q4 0 1200
200 2011Q1 3100 0
200 2011Q2 3200 0
200 2011Q3 3100 0
200 2011Q4 3300 0
200 2012Q1 2500 12876
300 2019Q4 2390 0
300 2010Q1 4300 0
300 2010Q2 2300 678
300 2010Q3 5600 0
300 2010Q4 5600 0
300 2011Q1 2500 0
300 2011Q2 3200 0
300 2011Q3 3100 0
300 2011Q4 3300 12876
300 2012Q1 3200 0
300 2012Q1 3100 0
300 2012Q2 3300 0
I would like to find:
1).- The number(s) of consecutive rows, in chronological order, within a by-group that Val_B = 0 (e.g. for ID 300 I will have three counts 2 , 5, 3) along with their respective and the beginning and ending Trns_QTR
e.g. 2, 2019Q4 - 2010Q1
5, 2010Q3 - 2011Q3
3, 2012Q1 - 2012Q2
2).- Create a Flag that will show whether the last Trns_QTR equals a given value i.e. 2012Q2 , of the latest "subgroup" (of consecutive zeros).
3).- Count the number of ID's that have count of consecutive Val_B, 0's GT 3 and its Trns_QTR equals a given value i.e. 2012Q2
Any feedback would be much appreciated
Thanking you in advance.
Best regards
Nikos
Data A;
  Input ID Trns_QTR:YYQ. Val_A Val_B;
  format Trns_Qtr YYQs6.;
  Datalines;
100 2010Q1 1456 345
100 2010Q2 2500 0
100 2010Q3 2500 0
100 2010Q4 2500 0
200 2010Q4 0 1200
200 2011Q1 3100 0
200 2011Q2 3200 0
200 2011Q3 3100 0
200 2011Q4 3300 0
200 2012Q1 2500 12876
300 2009Q4 2390 0
300 2010Q1 4300 0
300 2010Q2 2300 678
300 2010Q3 5600 0
300 2010Q4 5600 0
300 2011Q1 2500 0 
300 2011Q2 3200 0
300 2011Q3 3100 0
300 2011Q4 3300 12876
300 2012Q1 3200 0
300 2012Q1 3100 0
300 2012Q2 3300 0
;
Run;
data one;
 set a;
 by id Val_B notsorted;
 length range $ 20;
 retain first;
 if first.Val_B then do;first=Trns_QTR;call missing(Trns_QTR);count=0; end;
 count+1;
 if last.Val_B and Val_B=0 then do;range=catx('-',put(first,yyq.),put(Trns_QTR,yyq.));output;end;
 keep id range count;
run;
data two;
 set one;
 by id;
 if last.id and scan(range,-1,'-')='2012Q2' then flag=1;
  else flag=0;
run;
proc sql;
 create table three as
  select count(distinct id) as n
   from two
    where count gt 3 and  scan(range,-1,'-') eq '2012Q2';
quit;
Xia Keshan
I think this requires a multi-step approach. One answer to 1) could be:
Data A;
  Input ID Trns_QTR:YYQ. Val_A Val_B;
  format Trns_Qtr YYQs6.;
  Datalines;
100 2010Q1 1456 345
100 2010Q2 2500 0
100 2010Q3 2500 0
100 2010Q4 2500 0
200 2010Q4 0 1200
200 2011Q1 3100 0
200 2011Q2 3200 0
200 2011Q3 3100 0
200 2011Q4 3300 0
200 2012Q1 2500 12876
300 2019Q4 2390 0
300 2010Q1 4300 0
300 2010Q2 2300 678
300 2010Q3 5600 0
300 2010Q4 5600 0
300 2011Q1 2500 0 
300 2011Q2 3200 0
300 2011Q3 3100 0
300 2011Q4 3300 12876
300 2012Q1 3200 0
300 2012Q1 3100 0
300 2012Q2 3300 0
;
Run;
* create new ID;
Data B ;
  Length ID_dummy $100.;
  Set A;
  Retain dummy;
  By ID;
  If First.ID Then dummy=IfN(Val_B eq 0,1,0);
  Else Do;
    If Val_B eq 0 & dummy eq 0 Then dummy=1;
    Else If Val_B gt 0 Then dummy+1;
  End;
  If Val_B ne 0 Then Nr=0; Else Nr=dummy;
  If Nr ne 0;
  ID_dummy=Catt(Put(ID,Z3.),'_',Put(Nr,Z2.));
  One=1;
Run;
* get length;
Data C (Keep=ID Trns_Qtr Total ID_dummy);
  Do i=1 By 1 Until (Last.ID_dummy);
    Set B;
 By ID_dummy;
 Total=Sum(One,Total);
  End;
  Do Until (Last.ID_dummy);
    Set B;
 By ID_dummy;
 Output;
  End;
Run;
* save first and last;
Data D;
  Length Col $10.;
  Set C;
  By ID_dummy;
  If First.ID_dummy Then Do;
    Col="Start"; Output;
  End;
  Else If Last.ID_dummy Then Do;
    Col="End"; Output;
  End;
Run;
* transpose;
Proc Transpose Data=D Out=E (Drop=ID_dummy _NAME_);
  By ID_dummy ID Total;
  Var Trns_Qtr;
  ID Col;
Run;
Your not really asking a question there are you, more of a write my program for me ![]()
I note there is a logic error in your test data, the first record of 300 shows 2019 which sequentially would be after all the others??
For 1)
data have;
  infile datalines;
  input ID Trns_QTR yyq9. Val_A Val_B;
datalines;
300   2019Q4       2390       0
300   2010Q1       4300       0
300   2010Q2       2300       678
300   2010Q3       5600       0
300   2010Q4       5600       0
300   2011Q1       2500       0 
300   2011Q2       3200       0
300   2011Q3       3100       0
300   2011Q4       3300     12876
300   2012Q1       3200       0
300   2012Q1       3100       0
300   2012Q2       3300       0
;
run;
data inter;
  set have;
  by id;
  retain grp;
  if first.id then grp=1;
  if val_b ne 0 then grp=grp+1;
  if val_b=0 or last.id then sel=1;
run;
proc sql;
  create table WANT as
  select  ID,
          GRP,
          MIN(TRNS_QTR) as FIRST_DATE format=yyq9.,
          MAX(TRNS_QTR) as LAST_DATE format=yyq9.,
          COUNT(1) as N
  from    (select * from WORK.INTER where SEL=1)
  group by ID,
           GRP;
quit;
  
The above should give you a good start on 2 and 3.
Hi RW9,
You are right.
It should read 2009Q4 (instead of 2019Q4)
Thank you
data have;
infile cards;
length id 4 trns_qtr $6 val_a 8 val_b 8;
input id trns_qtr val_a val_b;
cards;
100 2010Q1 1456 345
100 2010Q2 2500 0
100 2010Q3 2500 0
100 2010Q4 2500 0
200 2010Q4 0 1200
200 2011Q1 3100 0
200 2011Q2 3200 0
200 2011Q3 3100 0
200 2011Q4 3300 0
200 2012Q1 2500 12876
300 2009Q4 2390 0
300 2010Q1 4300 0
300 2010Q2 2300 678
300 2010Q3 5600 0
300 2010Q4 5600 0
300 2011Q1 2500 0
300 2011Q2 3200 0
300 2011Q3 3100 0
300 2011Q4 3300 12876
300 2012Q1 3200 0
300 2012Q1 3100 0
300 2012Q2 3300 0
;
run;
%let compare_qtr=2012Q2;
/* solution of 1 & 2 */
data want1 (keep=id number startq endq flag);
set have;
by id;
retain startq endq number;
if first.id then number = .;
if val_b = 0
then do;
endq = trns_qtr;
if number = .
then do;
number = 1;
startq = trns_qtr;
end;
else number + 1;
end;
else do;
if number ne .
then do;
if endq = "&compare_qtr"
then flag = 1;
else flag = 0;
output;
number = .;
end;
end;
if last.id and number ne .
then do;
endq = trns_qtr;
if endq = "&compare_qtr"
then flag = 1;
else flag = 0;
output;
end;
run;
/* solution of 3 */
proc sql;
create table want2 as
select
count(distinct id) as id
from want1
where flag = 1 and number ge 3
;
quit;
Data A;
  Input ID Trns_QTR:YYQ. Val_A Val_B;
  format Trns_Qtr YYQs6.;
  Datalines;
100 2010Q1 1456 345
100 2010Q2 2500 0
100 2010Q3 2500 0
100 2010Q4 2500 0
200 2010Q4 0 1200
200 2011Q1 3100 0
200 2011Q2 3200 0
200 2011Q3 3100 0
200 2011Q4 3300 0
200 2012Q1 2500 12876
300 2009Q4 2390 0
300 2010Q1 4300 0
300 2010Q2 2300 678
300 2010Q3 5600 0
300 2010Q4 5600 0
300 2011Q1 2500 0 
300 2011Q2 3200 0
300 2011Q3 3100 0
300 2011Q4 3300 12876
300 2012Q1 3200 0
300 2012Q1 3100 0
300 2012Q2 3300 0
;
Run;
data one;
 set a;
 by id Val_B notsorted;
 length range $ 20;
 retain first;
 if first.Val_B then do;first=Trns_QTR;call missing(Trns_QTR);count=0; end;
 count+1;
 if last.Val_B and Val_B=0 then do;range=catx('-',put(first,yyq.),put(Trns_QTR,yyq.));output;end;
 keep id range count;
run;
data two;
 set one;
 by id;
 if last.id and scan(range,-1,'-')='2012Q2' then flag=1;
  else flag=0;
run;
proc sql;
 create table three as
  select count(distinct id) as n
   from two
    where count gt 3 and  scan(range,-1,'-') eq '2012Q2';
quit;
Xia Keshan
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.