I need create change log dataset which need compare the records in historical daily snapshot dataset and capture all the changes and put into change log data set. the daily snapshot dataset looks like below:
report date | ID_PART | PART_TYPE | PART_NM | PART_COST |
1/1/2022 | 1A1 | AA | BOLT | $ 3.00 |
1/2/2022 | 1A1 | AA | BOLT | $ 3.00 |
1/3/2022 | 1A1 | AA | BOLT | $ 4.00 |
1/4/2022 | 1A1 | AA | BOLT | $ 4.00 |
1/5/2022 | 1A1 | AA | BOLT | $ 4.00 |
1/6/2022 | 1A1 | BB | BOLT | $ 4.00 |
1/7/2022 | 1A1 | BB | BOLT | $ 4.00 |
1/8/2022 | 1A1 | BB | BOLT | $ 4.00 |
1/9/2022 | 1A1 | BB | BOLT | $ 4.00 |
1/10/2022 | 1A1 | BB | BOLT | $ 4.00 |
1/11/2022 | 1A1 | BB | BOLT | $ 4.00 |
1/12/2022 | 1A1 | CC | BOLT | $ 5.00 |
1/13/2022 | 1A1 | CC | BOLT | $ 5.00 |
1/14/2022 | 1A1 | CC | BOLT | $ 5.00 |
The result I am looking at is something like below:
any suggestions?
THANK YOU!!
Below should work for any number of columns.
data have;
infile datalines truncover dsd;
input report_date :mmddyy. ID_PART $ PART_TYPE $ PART_NM $ PART_COST;
format report_date date9. part_cost dollar16.2;
datalines;
1/01/2022,1A1,AA,BOLT,3.00
1/02/2022,1A1,AA,BOLT,3.00
1/03/2022,1A1,AA,BOLT,4.00
1/04/2022,1A1,AA,BOLT,4.00
1/05/2022,1A1,AA,BOLT,4.00
1/06/2022,1A1,BB,BOLT,4.00
1/07/2022,1A1,BB,BOLT,4.00
1/08/2022,1A1,BB,BOLT,4.00
1/09/2022,1A1,BB,BOLT,4.00
1/10/2022,1A1,BB,BOLT,4.00
1/11/2022,1A1,BB,BOLT,4.00
1/12/2022,1A1,CC,BOLT,5.00
1/13/2022,1A1,CC,BOLT,5.00
1/14/2022,1A1,CC,BOLT,5.00
1/01/2022,1xx,AA,BOLT,3.00
1/02/2022,1xx,AA,BOLT,3.00
1/03/2022,1xx,AA,BOLT,4.00
1/10/2022,1xx,AA,BOLT,4.00
;
/* ensure table is sorted by ID_PART and REPORT_DATE */
proc sort data=have presorted;
by id_part report_date;
run;
%let varlist=;
proc sql noprint;
select name into :varlist separated by ' '
from dictionary.columns
where libname='WORK' and memname='HAVE' and upcase(name) not in ('ID_PART', 'REPORT_DATE')
;
quit;
proc transpose data=have out=inter prefix=_report_date_ ;
by id_part;
id report_date;
var &varlist;
run;
data want(keep=id_part change_date change_field previous_value changed_to);
if 0 then set inter(keep=id_part);
length change_date $6 change_field $32 previous_value changed_to $100;
set inter(rename=(_name_=Change_Field));
by id_part;
array _vals {*} _report_date_:;
do _i=2 to dim(_vals);
if _vals[_i-1] ne _vals[_i] then
do;
change_date=scan(vname(_vals[_i]),-1,'_');
previous_value=_vals[_i-1];
changed_to=_vals[_i];
output;
end;
end;
run;
proc print data=want;
run;
You can check every variable for changes:
if part_cost=lag(part_cost) then...
and make decisions based on that comparison
BY logic work great here in combination with the LAG() function.
data want;
set have;
by id_part part_type part_num part_cost notsorted;
prev_part_type = lag(part_type);
prev_part_num = lag(part_num);
prev_part_cost = lag(part_cost);
if not first.id_part then do;
if part_cost ne prev_part_cost then do;
change_field = "part_cost";
previous_value = prev_part_cost;
changed_to = part_cost;
output;
end;
*add other checks which may be complex;
end;
run;
Another option that comes to mind is to create a copy of the data set incrementing the date by 1 and then using PROC COMPARE but you would have to finagle the results as well. May be simper and an option that could be worth exploring.
Below should work for any number of columns.
data have;
infile datalines truncover dsd;
input report_date :mmddyy. ID_PART $ PART_TYPE $ PART_NM $ PART_COST;
format report_date date9. part_cost dollar16.2;
datalines;
1/01/2022,1A1,AA,BOLT,3.00
1/02/2022,1A1,AA,BOLT,3.00
1/03/2022,1A1,AA,BOLT,4.00
1/04/2022,1A1,AA,BOLT,4.00
1/05/2022,1A1,AA,BOLT,4.00
1/06/2022,1A1,BB,BOLT,4.00
1/07/2022,1A1,BB,BOLT,4.00
1/08/2022,1A1,BB,BOLT,4.00
1/09/2022,1A1,BB,BOLT,4.00
1/10/2022,1A1,BB,BOLT,4.00
1/11/2022,1A1,BB,BOLT,4.00
1/12/2022,1A1,CC,BOLT,5.00
1/13/2022,1A1,CC,BOLT,5.00
1/14/2022,1A1,CC,BOLT,5.00
1/01/2022,1xx,AA,BOLT,3.00
1/02/2022,1xx,AA,BOLT,3.00
1/03/2022,1xx,AA,BOLT,4.00
1/10/2022,1xx,AA,BOLT,4.00
;
/* ensure table is sorted by ID_PART and REPORT_DATE */
proc sort data=have presorted;
by id_part report_date;
run;
%let varlist=;
proc sql noprint;
select name into :varlist separated by ' '
from dictionary.columns
where libname='WORK' and memname='HAVE' and upcase(name) not in ('ID_PART', 'REPORT_DATE')
;
quit;
proc transpose data=have out=inter prefix=_report_date_ ;
by id_part;
id report_date;
var &varlist;
run;
data want(keep=id_part change_date change_field previous_value changed_to);
if 0 then set inter(keep=id_part);
length change_date $6 change_field $32 previous_value changed_to $100;
set inter(rename=(_name_=Change_Field));
by id_part;
array _vals {*} _report_date_:;
do _i=2 to dim(_vals);
if _vals[_i-1] ne _vals[_i] then
do;
change_date=scan(vname(_vals[_i]),-1,'_');
previous_value=_vals[_i-1];
changed_to=_vals[_i];
output;
end;
end;
run;
proc print data=want;
run;
And just adding a small fix for change_date so the value doesn't get truncated ...and also is an actual SAS Date value and not a string.
data have;
infile datalines truncover dsd;
input report_date :mmddyy. ID_PART $ PART_TYPE $ PART_NM $ PART_COST;
format report_date date9. part_cost dollar16.2;
datalines;
1/01/2022,1A1,AA,BOLT,3.00
1/02/2022,1A1,AA,BOLT,3.00
1/03/2022,1A1,AA,BOLT,4.00
1/04/2022,1A1,AA,BOLT,4.00
1/05/2022,1A1,AA,BOLT,4.00
1/06/2022,1A1,BB,BOLT,4.00
1/07/2022,1A1,BB,BOLT,4.00
1/08/2022,1A1,BB,BOLT,4.00
1/09/2022,1A1,BB,BOLT,4.00
1/10/2022,1A1,BB,BOLT,4.00
1/11/2022,1A1,BB,BOLT,4.00
1/12/2022,1A1,CC,BOLT,5.00
1/13/2022,1A1,CC,BOLT,5.00
1/14/2022,1A1,CC,BOLT,5.00
1/01/2022,1xx,AA,BOLT,3.00
1/02/2022,1xx,AA,BOLT,3.00
1/03/2022,1xx,AA,BOLT,4.00
1/10/2022,1xx,AA,BOLT,4.00
;
/* ensure table is sorted by ID_PART and REPORT_DATE */
proc sort data=have presorted;
by id_part report_date;
run;
%let varlist=;
proc sql noprint;
select name into :varlist separated by ' '
from dictionary.columns
where libname='WORK' and memname='HAVE' and upcase(name) not in ('ID_PART', 'REPORT_DATE')
;
quit;
proc transpose data=have out=inter prefix=_report_date_ ;
by id_part;
id report_date;
var &varlist;
run;
data want(keep=id_part change_date change_field previous_value changed_to);
if 0 then set inter(keep=id_part);
length change_date 8 change_field $32 previous_value changed_to $100;
format change_date date9.;
set inter(rename=(_name_=Change_Field));
by id_part;
array _vals {*} _report_date_:;
do _i=2 to dim(_vals);
if _vals[_i-1] ne _vals[_i] then
do;
change_date=input(scan(vname(_vals[_i]),-1,'_'),date9.);
previous_value=_vals[_i-1];
changed_to=_vals[_i];
output;
end;
end;
run;
proc print data=want;
run;
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.