Re: Create new column based on other column values by Group

ez123 · Posted 10-12-2018 12:07 PM

Want to create new two fields f_no, m_no based on f_id and m_id by Ins_ID. Below are sample input and output. Can someone help or provide the code how do resolve this issue.

Input

Ins_ID	per_id	p_no	f_id	m_id
986789	984731	01
986789	984236	02
986789	435672	03
986789	123484	04	984731	984236
986789	435123	05	984731	984236
986789	654234	06	984731	984236
345754	654456	01
345754	234864	02		654456
345754	934512	03		654456

Output should be

Ins_ID	per_id	p_no	f_no	m_no
986789	984731	01
986789	984236	02
986789	435672	03
986789	123484	04	01	02
986789	435123	05	01	02
986789	654234	06	01	02
345754	654456	01
345754	234864	02		01
345754	934512	03		01

ballardw · Posted 10-12-2018 12:17 PM

@ez123 wrote:

Want to create new two fields f_no, m_no based on f_id and m_id by Ins_ID. Below are sample input and output. Can someone help or provide the code how do resolve this issue.

Input

Ins_ID

per_id

p_no

f_id

m_id

986789

984731

01

986789

984236

02

986789

435672

03

986789

123484

04

984731

984236

986789

435123

05

984731

984236

986789

654234

06

984731

984236

345754

654456

01

345754

234864

02

654456

345754

934512

03

654456

Output should be

Ins_ID

per_id

p_no

f_no

m_no

986789

984731

01

986789

984236

02

986789

435672

03

986789

123484

04

01

02

986789

435123

05

01

02

986789

654234

06

01

02

345754

654456

01

345754

234864

02

01

345754

934512

03

01

What is the actual rule for assigning the values of f_no and m_no? If the f_no or m_no variable changes inside what ever you mean by a group you should also show an example such as the sixth like having a value of f_id= 999999 and what the result would look like.

Also, if you can't create a data step to demonstrate your data then at least paste the example as text. That partial html(?) output format is not useful for attempting to create a data set to work with.

DBailey · Posted 10-12-2018 02:19 PM

not exactly sure what you are trying to do...but this might help...

data work.have;
infile cards truncover;
input Ins_ID per_id p_no $ f_id m_id;
cards;
986789 984731 01
986789 984236 02
986789 435672 03
986789 123484 04 984731 984236
986789 435123 05 984731 984236
986789 654234 06 984731 984236
345754 654456 01
345754 234864 02 654456
345754 934512 03 654456
;
run;

proc sql;
create table want as
select
	t1.ins_id
	,t1.per_id
	,t1.p_no
	,t2.per_id as f_no
	,t3.per_id as m_no
from	
	have t1
	left outer join have t2 on t1.f_id = t2.per_id
	left outer join have t3 on t1.m_id = t3.per_id
;
quit;

singhsahab · Posted 10-12-2018 02:47 PM

Hello EZ,

Based on your output here you can go ..

data have;
input Ins_ID per_id p_no f_id m_id;
datalines;
986789 984731 01 . .
986789 984236 02 . .
986789 435672 03 . .
986789 123484 04 984731 984236
986789 435123 05 984731 984236
986789 654234 06 984731 984236
345754 654456 01 . .
345754 234864 02 . 654456
345754 934512 03 . 654456
;
run;
proc sql ;
CREATE TABLE T1 AS SELECT DISTINCT PER_ID ,P_NO AS P2 FROM (
select a.per_id,a.p_no from have a inner  join have b on a.per_id=b.m_id
UNION ALL
select a.per_id,a.p_no from have a inner  join have b on a.per_id=b.F_id) ORDER BY PER_ID,P_NO ;
quit;

PROC SORT DATA=HAVE;
BY F_ID;
RUN;


DATA WANT1 (KEEP= Ins_ID per_id p_no f_no m_id);
MERGE HAVE(IN=A) T1(IN=B RENAME=(PER_ID=F_ID));
BY F_ID;
IF A AND B THEN f_no=P2;
IF MISSING(PER_ID) THEN DELETE;
RUN;


PROC SORT DATA=WANT1;
BY M_ID;
RUN;


DATA WANT (KEEP= Ins_ID per_id p_no f_no m_NO);
MERGE WANT1(IN=A) T1(IN=B RENAME=(PER_ID=M_ID));
BY M_ID;
IF A AND B THEN M_no=P2;
IF MISSING(PER_ID) THEN DELETE;
RUN;

Kindly note : final output is in sorted order.

Thanks.

ez123 · Posted 10-12-2018 03:05 PM

Thanks for your response...

Due to number of records and fields I don't want to use PROC SQL. I found a way but not sure this efficient way to do this. Anybody have better suggestions?

data temp;
infile DATALINES dsd missover;
input Ins_ID per_id p_no f_id m_id;
CARDS;
986789,984731,01,,
986789,984236,02,,
986789,123484,03,984731,984236
986789,435123,04,984731,984236
986789,654234,05,984731,984236
345754,654456,01,,
345754,234864,02,,654456
345754,934512,03,,654456
run;

proc sort data= temp; by Ins_ID f_id;run;

data f_temp1 (rename=(per_id=f_id p_no=f_no)) m_temp1 (rename=(per_id=m_id p_no=m_no)) ;
set temp (where = (f_id = . and m_id = .));
Keep Ins_ID per_id p_no;
run;

proc sort data= f_temp1; by Ins_ID f_id;run;

proc sort data= m_temp1; by Ins_ID m_id;run;

data out1;
merge temp (in=a) f_temp1 (in=b);
by Ins_ID f_id;
if a then output; run;

data out2;
merge out1 (in=a) m_temp1 (in=b);
by Ins_ID m_id;
if a then output; run;

singhsahab · Posted 10-13-2018 04:46 AM

Great ...

you can create macro to sort and merge datasets to reduce more coding..

Ksharp · Posted 10-13-2018 06:52 AM

data work.have;
infile cards truncover;
input Ins_ID per_id p_no $ f_id m_id;
cards;
986789 984731 01
986789 984236 02
986789 435672 03
986789 123484 04 984731 984236
986789 435123 05 984731 984236
986789 654234 06 984731 984236
345754 654456 01
345754 234864 02 654456
345754 934512 03 654456
;
run;

data want;
 if _n_=1 then do;
  if 0 then set have(rename=(p_no=_p_no)) ;
  declare hash h(dataset:' have(rename=(p_no=_p_no)');
  h.definekey('per_id');
  h.definedata('_p_no');
  h.definedone();
 end;
set have;
if h.find(key:f_id)=0 then f_no=_p_no;
if h.find(key:m_id)=0 then m_no=_p_no;
drop  p_no _p_no ;
run;

Create new column based on other column values by Group