Solved: Re: vertical concatenation2

kyath_sas · Posted 07-01-2018 05:04 AM

Hi friends,

Could you please any one solve this

data have;
input sno gno x$ date1 date2;
informat date1 date2 date9.;
format date:date9.;
cards;
1 1 x 03jan1999 06may2010
1 2 y 04jan1999 07may2010
1 3 z 05jan1999 08may2010
2 1 a 03feb2001 06jun2011
2 2 b 04feb2001 07jun2011
2 2 c 05feb2001 08jun2011
2 3 d 06feb2001 09jun2011
;
run;
output like this;
1 xyz 03jan1999 08may2010
2 abd 03feb2001 09jun2011
2 acd 03feb2001 09jun2011

Tom · Posted 07-01-2018 01:32 PM

You might be able to use MERGE to do that, but it depends on the number of replicates for the variables GNO numbers within the SNO numbers. Note that a data step MERGE will match observations one for one when there are multiple input datasets that have replicates for the same BY variable value. So it will work in you case where on GNO=2 repeats. The values for when GNO=1 and GNO=3 will be carried forward onto both records.

data want ;
  merge
    have(where=(gno=1) rename=(x=x1 date1=date1_1 date2=date2_1))
    have(where=(gno=2) rename=(x=x2 date1=date1_2 date2=date2_2))
    have(where=(gno=3) rename=(x=x3 date1=date1_3 date2=date2_3))
  ;
  by sno ;
  length xlist $3 ;
  xlist=cats(of x1-x3);
  start=min(of date:);
  stop =max(of date:);
  drop gno x1-x3 date: ;
  format start stop date9.;
run;

If you could multiple replicates and wanted to take all combinations then use PROC SQL.

proc sql ;
  create table want as 
  select a.sno
       , cats(a.x,b.x,c.x) as xlist length=3
       , min( min(a.date1,b.date1,c.date1) ) as start format=date9.
       , max( max(a.date2,b.date2,c.date2) ) as stop format=date9.
  from 
    have(where=(gno=1)) as a 
  , have(where=(gno=2)) as b
  , have(where=(gno=3)) as c
  where a.sno = b.sno and b.sno=c.sno 
  group by 1,2
  order by 1,2 
  ;
quit;

View solution in original post

Ksharp · Posted 07-01-2018 07:08 AM

If there was at most two same GNO for the same SNO,that would be easy.

 

data have;
input sno gno x$ date1  date2;
informat date1 date2 date9.;
format date:date9.;
cards;
1 1 x 03jan1999 06may2010
1 2 y 04jan1999 07may2010
1 3 z 05jan1999 08may2010
2 1 a 03feb2001 06jun2011
2 2 b 04feb2001 07jun2011
2 2 c 05feb2001 08jun2011
2 3 d 06feb2001 09jun2011
;
run;


data data1 data2;
 set have;
 by sno gno;
 if first.gno then output data1;
  else output data2;
run;
proc sql;
create table temp as
 select * from data1 where sno in (select distinct sno from data2) order by sno,gno;
quit;
data want1;
length list $ 200;
 do until(last.sno);
  set data1;
  by sno;
  if first.sno then first=date1;
  list=cats(list,x);
 end;
 last=date2;
 drop x date1 date2 gno;
 format first last date9.;
run;




data temp1;
 merge temp data2;
 by sno gno;
run;
data want2;
length list $ 200;
 do until(last.sno);
  set temp1;
  by sno;
  if first.sno then first=date1;
  list=cats(list,x);
 end;
 last=date2;
 drop x date1 date2 gno;
 format first last date9.;
run;

data want;
 set want1 want2;
 by sno;
run;

Tom · Posted 07-01-2018 01:32 PM

You might be able to use MERGE to do that, but it depends on the number of replicates for the variables GNO numbers within the SNO numbers. Note that a data step MERGE will match observations one for one when there are multiple input datasets that have replicates for the same BY variable value. So it will work in you case where on GNO=2 repeats. The values for when GNO=1 and GNO=3 will be carried forward onto both records.

data want ;
  merge
    have(where=(gno=1) rename=(x=x1 date1=date1_1 date2=date2_1))
    have(where=(gno=2) rename=(x=x2 date1=date1_2 date2=date2_2))
    have(where=(gno=3) rename=(x=x3 date1=date1_3 date2=date2_3))
  ;
  by sno ;
  length xlist $3 ;
  xlist=cats(of x1-x3);
  start=min(of date:);
  stop =max(of date:);
  drop gno x1-x3 date: ;
  format start stop date9.;
run;

If you could multiple replicates and wanted to take all combinations then use PROC SQL.

proc sql ;
  create table want as 
  select a.sno
       , cats(a.x,b.x,c.x) as xlist length=3
       , min( min(a.date1,b.date1,c.date1) ) as start format=date9.
       , max( max(a.date2,b.date2,c.date2) ) as stop format=date9.
  from 
    have(where=(gno=1)) as a 
  , have(where=(gno=2)) as b
  , have(where=(gno=3)) as c
  where a.sno = b.sno and b.sno=c.sno 
  group by 1,2
  order by 1,2 
  ;
quit;

kyath_sas · Posted 07-02-2018 01:46 PM

Thanks Tom ,

Really nice thanks for help . i checked for huge data ..its working.:-)

kyath_sas · Posted 07-04-2018 04:24 PM

Hi tom,

thanks for your solution .i am very thankful to you .

i tried your code for data which have 1 sno with 1 codenum its working

but some data i have single sno have multiple codenum could you suggest me on this type data ??

could you please suggest me on this..

data have;
input sno seqnum codenum dnum b$;
datalines;
1 1 100 2 a
1 2 100 3 b
1 5 100 3 c
1 6 100 3 d
1 4 200 2 e
1 5 200 2 f
1 6 200 3 g
1 7 200 3 h
2 3 150 2 i
2 5 150 3 j
2 6 150 4 k
2 1 250 2 l
2 2 250 3 m
2 1 300 2 n
3 11 200 2 o
3 12 200 3 p
3 13 200 3 q
3 15 200 4 r
3 10 250 2 s

;
run;

output like this:

1 100 3 a,b
1 100 3 a,c
1 100 3 a,d
1 200 2 e
1 200 3 f,g
1 200 3 f,h
2 150 4 i,j,k
2 250 3 l,m
2 300 2 n
3 200 4 o,p,r
3 200 4 o,q,r
3 250 2 s

here i gave already sorted data based on first 4 variables i need vertical concatenation

4th variable sequence is based on 2nd variable

please see input data i have added all patterns ..i have huge records like this

and in original data 4th variable values are from 2 to20max.

based on 1st 3rd variable grouping

dnum variable seq :

1) 2-3-4-5 seq give 1 line

2) 2-3-3 seq give 2lines 2,3 and 2,3

3) 2-3-4-4-5 seq gives 2lines 2,3,first4,5 and 2,3,second4,5

4) only 2 gives only 1 line

Ksharp · Posted 07-05-2018 10:37 AM

Get rid of the duplicated 2s and run @Tom 's code again.

data have;
input sno seqnum codenum dnum  b$;
datalines;
1 1  100 2 a
1 2  100 3 b
1 5  100 3 c
1 6  100 3 d
1 4  200 2 e
1 5  200 2 f
1 6  200 3 g
1 7  200 3 h
2 3  150 2 i
2 5  150 3 j
2 6  150 4 k
2 1  250 2 l
2 2  250 3 m
2 1  300 2 n
3 11 200 2 o
3 12 200 3 p
3 13 200 3 q
3 15 200 4 r
3 10 250 2 s
;
run;
data temp more;
 set have;
 by sno codenum dnum ;
 if dnum=2 and not last.dnum then output more;
  else output temp;
run;
proc sql ;
  create table want1 as 
  select a.sno,a.codenum,a.dnum as dnum1,b.dnum as dnum2,a.b as b1 ,b.b as b2 
  from 
    temp(where=(dnum=2)) as a 
  left join temp(where=(dnum=3)) as b
  on a.sno = b.sno and a.codenum=b.codenum ;

 create table want2 as
  select a.sno,a.codenum,max(dnum1,dnum2,b.dnum) as dnum
       , cats(b1,b2,b.b) as xlist 
  from 
    want1 as a 
  left join temp(where=(dnum=4)) as b
  on a.sno = b.sno and a.codenum=b.codenum 
  group by 1,2
  order by 1,2 
  ;
quit;

data want;
 set want2 more(keep= sno codenum dnum b rename=(b=xlist));
 by sno codenum dnum ;
run;

novinosrin · Posted 07-01-2018 08:51 PM

data have;
input sno gno x$ date1  date2;
informat date1 date2 date9.;
format date: date9.;
cards;
1 1 x 03jan1999 06may2010
1 2 y 04jan1999 07may2010
1 3 z 05jan1999 08may2010
2 1 a 03feb2001 06jun2011
2 2 b 04feb2001 07jun2011
2 2 c 05feb2001 08jun2011
2 3 d 06feb2001 09jun2011
;
run;

proc transpose data=have out=t;
by sno gno;
var x;
run;

data t2;
set t;
by sno ;
array c(*) col:;
do _n_=1 to dim(c);
if missing(c(_n_)) then c(_n_)=coalescec(of col:);
end;
keep sno col:;
run;

proc transpose data=t2 out=t3;
by sno ;
var col:;
run;

data t4;
set t3;
length want $50;
want=cats(of col:);
keep sno want;
run;

proc sql;
create table want as
select distinct a.sno,want, date1 format=date9., date2 format=date9.
from t4 a, (select sno, min(date1) as date1, max(date2)as date2 from have group by sno)	b
where a.sno=b.sno ;
quit;

Registration is open

SAS Training: Just a Click Away