BookmarkSubscribeRSS Feed
purpleclothlady
Pyrite | Level 9

Dear all:

I tried to compare old and new dataset , but failed , the desired output is in ATTACHED doc. please kindly suggest. thanks Purple

 


data class;
set sashelp.class end=eof;
if name='Jane' then do ;
    weight=150;
end;
if name='Carol' then do;
     age=19;
end;
output;
if eof then do;	
        name = "Meghan";
        sex = "F";
        age = 15;
        output;
        name = "Joe";
        height = 55 ;
        weight = 88;
        output;	
		name = "Meghan";
        sex = "F";
        age = 15;
        output;
		name = "Jane";
        sex = "F";
		height=44;
        age = 15;
        output;
		name = "John";
        sex = "F";
        age = 12;
				weight=99;

        output;
end;
run;
proc sort;by name;run;

data classnew;
set sashelp.class end=eof;
new_age	 =age;	new_height=height; new_sex=sex;  new_weight =weight;
if name="Mary" then delete;
if name="Alice" then delete;

output;
if eof then do;	
				 
        name = "Meghan";
        sex = "F";
				weight=90	;
        age = 16;
        output;
 
        name = "Meghan";
        sex = "F";
        age = 15;
        output;
 
        name = "Joe";
        height = 55 ;
        weight = 88;
        output;	
		name = "Meghan";
        sex = "F";
        age = 15;
        output;
		name = "Jane";
        sex = "F";
		height=44;
        age = 15;
        output;
		name = "John";
        sex = "F";
        age = 12;
		weight=99;
        output;
		name = "John";
        sex = "F";
        age = 12;
		height=69;
		weight=77;
		OUTPUT;
		name = "Frank";
        sex = "M";
        age = 19;
		height=60;
		weight=70;
		output;
		name="Sara";
		sex="F";
		age=16;
		height=57;
		weight=79;
		output;
end;

run;
 
proc sort;by name;run;

proc print data=class;run;
proc print data=classnew;run;


/*CODE - NOT CORRECT*/
data aa_diff;
			length _check $25.;
  merge  class (in=old)
         classnew(in=new);
	by name;
      if old and new then do;
             if age ~= new_age	 		then do;_check="Revised Record";_check2=1;	end;
             if height^=new_height 		then do;_check="Revised Record";_check2=2;	end;
             if sex^=new_sex 			then do;_check="Revised Record";_check2=3; 	end;
             if weight^=new_weight 		then do;_check="Revised Record";_check2=4;	end;
			end; 
			if old and not new then  _check="Deleted Record";
			if new and not old then  _check="New Record";
run;
			 
ods results off;
ods listing close;
ods escapechar="^";

ods excel file="c:\diiff.xlsx"
		style=excel
		options 
    (row_repeat='header'
		 frozen_headers="ON" 
		 frozen_rowheaders="OFF" 
		 ROWBREAKS_INTERVAL= 'OUTPUT'
		 sheet_label=' '
		 embedded_titles='YES'
		 FitToPage='ON'
		 orientation="landscape"
		 flow="ROWHEADERS");


	ods excel options(autofilter='ALL' sheet_name="diff");
		 
	proc report data=aa_diff nowd split='~' missing
	style(report)=[font_size=9pt rules=all bordercolor=#f0f0f0 borderstyle=solid borderwidth=2pt frame=box] 
	style(column)=[font=(Arial, 9pt)  bordercolor=#f0f0f0 borderstyle=solid borderwidth=2pt cellwidth=0.6in vjust=m] 
	style(header)=[font=(Arial, 9pt) backgroundcolor=#ADD8E6 bordercolor=#f0f0f0 borderstyle=solid borderwidth=1pt font_weight=bold foreground=white vjust=m];

 	column name age sex weight height  _check2;
	define name	/             			order=data style(column)=[cellwidth=0.8in];
	define age  /              			style(column)=[cellwidth=1.1in];
	define sex	/      		order=data style(column)=[cellwidth=1.5in];
 	define weight /  		order=data style(column)=[cellwidth=0.8in];
	define height /  													style(column)=[cellwidth=0.8in]; 
	define _check2   / noprint;
						 
		compute _check;
				if _check="New Record"  then do;
					   call define(_row_,'style','style={background=#90ee90}');
				end;

				else if _check="Deleted Record"  then do;
					   call define(_row_,'style','style={background=#ffcccb}');
				end;

				else if _check="Revised Record"  then do;
						   if _check2=1 then do;
						   		call define('name','style','style={background=yellow}');
								call define ('age','style','style={background=yellow}');
							 end;
							  if _check2=2 then do;
						   		call define('name','style','style={background=yellow}');
								call define ('age','style','style={background=yellow}');
							 end;
							  if _check2=3 then do;
						   		call define('name','style','style={background=yellow}');
								call define ('height','style','style={background=yellow}') ;
							 end;
							  if _check2=4 then do;
						   		call define('Subject','style','style={background=yellow}');
								call define ('weight','style','style={background=yellow}');
							 end;
					end;
		endcomp;
run;

 

10 REPLIES 10
Reeza
Super User
Data steps cannot do a many to many merges, try doing that merge step in SQL instead.

purpleclothlady
Pyrite | Level 9

Hi Reeze:

I tried to use PROC SQL, still not correct at all. 

please help. thanks a lot 

purple

Reeza
Super User
Can you show your SQL Merge code as well then please? No point in starting from scratch.
purpleclothlady
Pyrite | Level 9

Hi Reeze:

What I need to know is :

/*

for dataset: class VS classnew (using classnew as comparing base): proc compare is not working or maybe i only know the basic version. 

**GOAL: 

if there is any deleted records 

if there is any new records

if there is any revised records 

here is the proc sql 

*/


/*RAW DATA*/
data class;
set sashelp.class end=eof;
if name='Jane' then do ;
weight=150;
end;
if name='Carol' then do;
age=19;
end;
output;
if eof then do;
name = "Meghan";
sex = "F";
age = 15;
output;

name = "Joe";
height = 55 ;
weight = 88;
output;
name = "Meghan";
sex = "F";
age = 15;
output;
name = "Jane";
sex = "F";
height=44;
age = 15;
output;
name = "John";
sex = "F";
age = 12;
weight=99;
output;
end;
run;

proc sort;by name;run;

data class2;
set sashelp.class end=eof;
if name="Mary" then delete;
if name="Alice" then delete;

output;
if eof then do;

name = "Meghan";
sex = "F";
weight=90 ;
age = 16;
output;

name = "Meghan";
sex = "F";
age = 15;
output;

name = "Joe";
height = 55 ;
weight = 88;
output;
name = "Meghan";
sex = "F";
age = 15;
output;
name = "Jane";
sex = "F";
height=44;
age = 15;
output;
name = "John";
sex = "F";
age = 12;
weight=110;
output;

name = "John";
sex = "F";
age = 12;
height=69;
weight=77;
OUTPUT;
name = "Frank";
sex = "M";
age = 19;
height=60;
weight=70;
output;
name="Sara";
sex="F";
age=16;
height=57;
weight=79;
output;
end;
run;

data classnew;
set class2;
new_age =age; new_height=height; new_sex=sex; new_weight =weight;
drop age height sex weight;
run;

proc sort;by name;run;


proc print data=class;run;
proc print data=classnew;run;


/*SQL */
proc sql;
create table onlyOLD as
select *, "Deleted" as _check length=20
from class natural left join classnew (keep=name)
where classnew.name is null
order by name;
quit;

proc sql;
create table onlyNEW as
select * ,"New" as _check length=20
from classnew natural left join class (keep=name)
where class.name is null
order by name;
quit;

 

proc sql;
create table both as
select a.*, b.new_age,b.new_sex , b.new_weight,b.new_height,
case
when age ne new_age then 1
when sex ne new_sex then 2
when weight ne new_weight then 3
when height ne new_height then 4
else .
end as _check2

from class as a inner join classnew as b
on a.name=b.name
order by b.name;
quit;

proc sql;
create table aa_diff as
select a.age, a.sex, a.weight, a.height,a._check2, b.*,
case
when a._check2 ne . then "Revised"
end as _check length=20
from both as a natural right join classnew as b
order by classnew.name ;
quit;


/*NEED TO APPEND BOTH CLASS AND CLASSNEW TO GET _CHECK= "NEW" "DELETED" "REVISED"*/
data final;
set onlynew
onlyold
aa_diff;
length _CHECK $20.;
run;

 

proc print data=final;

  * these are difference -- where name in ("John", "Mary" ,"Alice","Morgan","Sara","Frank");
run;

Reeza
Super User

The SQL I included in my previous post gives you the rough data set you indicated in the Word Doc. Take that data set and then apply the rest of your rules as needed is one solution. 

Tom
Super User Tom
Super User

Not having unique keys makes comparison more difficult.

For your example CLASS data you might use NAME and AGE as the keys to try to get to a unique set of keys.

 

Here is a 24+ year old macro from Tom Hoffman you can use.

%macro compds
/*----------------------------------------------------------------------
Conpare two data sets.
----------------------------------------------------------------------*/
(old=      /* Original data set */
,new=      /* Changed data set */
,keys=     /* List of variables that uniquely identify a record */
);

/*----------------------------------------------------------------------
Thanks to Tom Hoffman.
----------------------------------------------------------------------*/
%local lkey;
%let lkey=%scan(&keys,-1);

data
  old
  new
  deleted
  added
  ;
  set
    &old(in=in1)
    &new
  ;
  by &keys;
  if ^(first.&lkey*last.&lkey) then do;
    if (in1) then output old;
    else output new;
  end;
  else if (in1) then output deleted;
  else output added;
run;

proc print data=added;
  title3 "Records in &new but not in &old";
run;

proc print data=deleted;
  title3 "Records in &old but not in &new";
run;

proc compare data=old compare=new;
  id &keys;
  title3 "OLD=&old - NEW=&new";
run;
title3;

%mend compds;
Reeza
Super User
proc sql;
create table aa_diff as
select old.*, new.*

from class as old full join
classnew as new on
old.name=new.name;
quit;

I didn't bother with the CASE statements mostly because you can do that. But...I don't think your _check2 variable makes sense. You're only tracking the last issue. For example if both AGE and WEIGHT are different _check2 will be 4. I think your logic is incorrect there but not sure what you want to do.

And just a note - PROC COMPARE output doesn't do a decent job for you I'm assuming?
Tom
Super User Tom
Super User

What are the rules you are trying to implement?

 

Note: It looks like you are trying to use these two datasets as examples. If you want help in creating code to post to share example data use this macro:  %ds2post() 

data work.class ;
  infile datalines dsd dlm='|' truncover;
  input Name :$8. Sex :$1. Age Height Weight ;
datalines4;
Alfred|M|14|69|112.5
Alice|F|13|56.5|84
Barbara|F|13|65.3|98
Carol|F|19|62.8|102.5
Henry|M|14|63.5|102.5
James|M|12|57.3|83
Jane|F|12|59.8|150
Jane|F|15|44|88
Janet|F|15|62.5|112.5
Jeffrey|M|13|62.5|84
Joe|F|15|55|88
John|M|12|59|99.5
John|F|12|44|99
Joyce|F|11|51.3|50.5
Judy|F|14|64.3|90
Louise|F|12|56.3|77
Mary|F|15|66.5|112
Meghan|F|15|66.5|112
Meghan|F|15|55|88
Philip|M|16|72|150
;;;;

data work.classnew ;
  infile datalines dsd dlm='|' truncover;
  input Name :$8. Sex :$1. Age Height Weight new_age new_height
    new_sex :$1. new_weight
  ;
datalines4;
Alfred|M|14|69|112.5|14|69|M|112.5
Barbara|F|13|65.3|98|13|65.3|F|98
Carol|F|14|62.8|102.5|14|62.8|F|102.5
Frank|M|19|60|70|15|66.5|M|112
Henry|M|14|63.5|102.5|14|63.5|M|102.5
James|M|12|57.3|83|12|57.3|M|83
Jane|F|12|59.8|84.5|12|59.8|F|84.5
Jane|F|15|44|88|15|66.5|M|112
Janet|F|15|62.5|112.5|15|62.5|F|112.5
Jeffrey|M|13|62.5|84|13|62.5|M|84
Joe|F|15|55|88|15|66.5|M|112
John|M|12|59|99.5|12|59|M|99.5
John|F|12|44|99|15|66.5|M|112
John|F|12|69|77|15|66.5|M|112
Joyce|F|11|51.3|50.5|11|51.3|F|50.5
Judy|F|14|64.3|90|14|64.3|F|90
Louise|F|12|56.3|77|12|56.3|F|77
Meghan|F|16|66.5|90|15|66.5|M|112
Meghan|F|15|66.5|90|15|66.5|M|112
Meghan|F|15|55|88|15|66.5|M|112
Philip|M|16|72|150|16|72|M|150
Robert|M|12|64.8|128|12|64.8|M|128
Ronald|M|15|67|133|15|67|M|133
Sara|F|16|57|79|15|66.5|M|112
Thomas|M|11|57.5|85|11|57.5|M|85
William|M|15|66.5|112|15|66.5|M|112
;;;;
purpleclothlady
Pyrite | Level 9
Hi Tom:
thanks for reminder, I will need to learn before use the macro. currently need to solve the issue for job.
purpleclothlady
Pyrite | Level 9

thanks for all the valuable input. there is one solution I found:

when there is many to many merge, we need to find out or derive a unique key for a by var. , I finally found that Unique key var. By doing us, we may be able to Convert a Many to Many as a One to One.

eg.

/*RAW DATA*/
data class;
set sashelp.class end=eof;
if name='Jane' then do ;
    weight=150;
end;
if name='Carol' then do;
     age=19;
end;
output;
if eof then do;	
        name = "Meghan";
        sex = "F";
        age = 15;
        output;
 
        name = "Joe";
        height = 55 ;
        weight = 88;
        output;	
				name = "Meghan";
        sex = "F";
        age = 15;
        output;
				name = "Jane";
        sex = "F";
				height=44;
        age = 15;
        output;
				name = "John";
        sex = "F";
        age = 12;
				weight=99;
        output;
end;
run;
 
proc sort;by name age;run;

data classf;
 set class;
 retain unique;
 by name;
			unique+1;
		 if first.name then unique=1;
run;
	 
proc sort;by name unique;run;


data class2;
set sashelp.class end=eof;
if name="Mary" then delete;
if name="Alice" then delete;

output;
if eof then do;	
				 
        name = "Meghan";
        sex = "F";
				weight=90	;
        age = 16;
        output;
 
        name = "Meghan";
        sex = "F";
        age = 15;
        output;
 
        name = "Joe";
        height = 55 ;
        weight = 88;
        output;	
				name = "Meghan";
        sex = "F";
        age = 15;
        output;
				name = "Jane";
        sex = "F";
				height=44;
        age = 15;
        output;
				name = "John";
        sex = "F";
        age = 12;
				weight=110;
        output;
				
				name = "John";
        sex = "F";
        age = 12;
				height=69;
				weight=77;
				OUTPUT;
				name = "Frank";
        sex = "M";
        age = 19;
				height=60;
				weight=70;
				output;
				name="Sara";
				sex="F";
				age=16;
				height=57;
				weight=79;
				output;
end;
run;

data classnew;
set class2;
		new_age	 =age;	new_height=height; new_sex=sex;  new_weight =weight;
		drop age height sex weight;
run;
	 
proc sort;by name ;run;

data classnewf;
 set classnew;
 retain unique;
 by name;
			unique+1;
		 if first.name then unique=1;
run;

proc sort;by name unique;run;



/*NEED TO APPEND BOTH CLASS AND CLASSNEW TO GET _CHECK= "NEW" "DELETED" "REVISED"*/
data result;
  merge classf	 (in=a)
		  classnewf(in=b );	
	by name unique;
	 if a and not b  	then _ck="Deleted";
	 if b and not a	 	then _ck="New";
	 if (a and b) 		then _ck="Both";
run;

data final;
 set result;
  if _ck in ("New","Deleted") then _ck2=_ck;
   if _ck ="Both" then do;
 			if  (new_age~=age or new_sex~=sex or  new_height~=height or new_weight~=weight) then _ck2="Revised"	;
	end;
run;

/**/

purpleclothlady_0-1643055491389.png

 

hackathon24-white-horiz.png

2025 SAS Hackathon: There is still time!

Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!

Register Now

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 10 replies
  • 2149 views
  • 3 likes
  • 3 in conversation