Hi, I have written a program that is clearly commented so by looking at the code and results you should be able to clearly see what the problem is (you can directly copy-paste-run the code) :
/*
One customer can move between different segments.
There can also come new segment generations.
This program:
1. Simulates how a customer table can look at specific
timepoints (snapshots).
2. Then the expected result are printed out in a table.
3. A "from to" table is created to follow the customers
segment belonging at different periods.
4. We see that row 3, 4 and 18 from the resulting table ("actual_result")
differ from the "expected result" at the bottom of the program.
I cannot figure out why this happens?
*/
/*--------------------------------------------------------------*/
/* Creating test data. */
/* Used for creating an expected test result in the next section. */
/*--------------------------------------------------------------*/
/*The customer table at the "beginning". */
data customer_table_snapshot_1;
input date :yymmdd10. customer_id segment_generation_1 $;
format date yymmdd10.;
datalines;
2023-01-01 111 A
2023-01-01 222 B
2023-01-01 333 C
;
run;
/*Customers have changed segment but still within segment_generation_1.*/
data customer_table_snapshot_2;
input date :yymmdd10. customer_id segment_generation_1 $;
format date yymmdd10.;
datalines;
2023-01-07 111 A
2023-01-07 222 C
2023-01-07 333 A
;
run;
/*Now a new column segment_generation_2 has been added. */
data customer_table_snapshot_3;
input date :yymmdd10. customer_id segment_generation_1 $ segment_generation_2 $ ;
format date yymmdd10.;
datalines;
2023-01-10 111 B A
2023-01-10 222 C C
2023-01-10 333 A C
;
run;
/*Now both the segment_generation_1 and segment_generation_2 has changed. */
/*This is so that we can make sure the method can handle that two columns change.*/
data customer_table_snapshot_4;
input date :yymmdd10. customer_id segment_generation_1 $ segment_generation_2 $ ;
format date yymmdd10.;
datalines;
2023-01-15 111 A B
2023-01-15 222 C B
2023-01-15 333 B B
;
run;
data customer_table_snapshot_5;
input date :yymmdd10. customer_id segment_generation_1 $ segment_generation_2 $ ;
format date yymmdd10.;
datalines;
2023-02-20 111 A C
2023-02-20 222 C C
2023-02-20 333 B B
;
run;
/*--------------------------------------------------------------*/
/* Expected result from using test data. */
/*--------------------------------------------------------------*/
data expected_from_to_result;
LENGTH customer_id 8 segment_generation $ 100 segment_belonging $ 1 valid_from_date 8 valid_to_date 8;
input customer_id segment_generation $ segment_belonging $ valid_from_date :yymmdd10. valid_to_date :yymmdd10.;
format valid_from_date yymmdd10. valid_to_date yymmdd10.;
datalines;
111 segment_generation_1 A 2023-01-01 2023-01-09
111 segment_generation_1 B 2023-01-10 2023-01-14
111 segment_generation_1 A 2023-01-15 9999-12-31
111 segment_generation_2 A 2023-01-10 2023-01-14
111 segment_generation_2 B 2023-01-15 2023-02-19
111 segment_generation_2 C 2023-02-20 9999-12-31
222 segment_generation_1 B 2023-01-01 2023-01-06
222 segment_generation_1 C 2023-01-07 9999-12-31
222 segment_generation_2 C 2023-01-10 2023-01-14
222 segment_generation_2 B 2023-01-15 2023-02-19
222 segment_generation_2 C 2023-02-20 9999-12-31
333 segment_generation_1 C 2023-01-01 2023-01-06
333 segment_generation_1 A 2023-01-07 2023-01-14
333 segment_generation_1 B 2023-01-15 9999-12-31
333 segment_generation_2 C 2023-01-10 2023-01-14
333 segment_generation_2 B 2023-01-15 9999-12-31
;
run;
proc sort data=expected_from_to_result;
by customer_id segment_generation valid_from_date;
run;
/*--------------------------------------------------------------*/
/* Creating a from_to table*/
/*--------------------------------------------------------------*/
data historic_info_customers;
LENGTH customer_id 8 segment_generation $ 100 segment_belonging $ 1 valid_from_date 8 valid_to_date 8;
format valid_from_date yymmdd10. valid_to_date yymmdd10.;
stop;
run;
proc sql;
create index idx on historic_info_customers(customer_ID,valid_to_date);
quit;
%macro update_history1(snapshot);
data historic_info_customers;
set &snapshot;
valid_to_date='31dec9999'd;
modify historic_info_customers key=idx;
if not _iorc_ then do;
if segment_belonging ne segment_generation_1 then do;
valid_to_date=date-1;
replace;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation='segment_generation_1';
segment_belonging=segment_generation_1;
output;
end;
end;
else do;
_error_=0;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation='segment_generation_1';
segment_belonging=segment_generation_1;
output;
end;
run;
%mend;
%update_history1(customer_table_snapshot_1);
%update_history1(customer_table_snapshot_2);
%update_history1(customer_table_snapshot_3);
%update_history1(customer_table_snapshot_4);
%update_history1(customer_table_snapshot_5);
%macro update_history2(snapshot);
data historic_info_customers;
set &snapshot;
valid_to_date='31dec9999'd;
modify historic_info_customers key=idx;
if not _iorc_ then do;
if segment_belonging ne segment_generation_2 or segment_generation ne 'segment_generation_2' then do;
valid_to_date=date-1;
replace;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation='segment_generation_2';
segment_belonging=segment_generation_2;
output;
end;
end;
else do;
_error_=0;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation='segment_generation_2';
segment_belonging=segment_generation_2;
output;
end;
run;
%mend;
/*%update_history2(customer_table_snapshot_1); */
/*%update_history2(customer_table_snapshot_2);*/
%update_history2(customer_table_snapshot_3);
%update_history2(customer_table_snapshot_4);
%update_history2(customer_table_snapshot_5);
data actual_result;
set historic_info_customers;
run;
proc sort data=actual_result;
by customer_id segment_generation valid_from_date;
run;
/*--------------------------------------------------------------*/
/* Checking if the expected result is what we get */
/*--------------------------------------------------------------*/
/*comparing */
data expected_result;
set expected_from_to_result;
run;
data actual_result;
set actual_result;
run;
/*Row 3, 8 och 14 differ, WHY?*/
proc compare base=expected_result
compare=actual_result;
run;
Any one know why the result is not as expected?
If you see some possibilities to improve the code that is much appreciated as well.
Thanks in advance.
Is macro update_history2 meant to update rows where segment_generation = 'segment_generation_1' ? Because that's what is happening.
If you only want to update rows from segment generation 2, you could add the segment generation to the index.
proc sql;
create index idx2 on historic_info_customers(customer_ID,segment_generation,valid_to_date);
quit;
%macro update_history2(snapshot);
data historic_info_customers;
set &snapshot;
valid_to_date='31dec9999'd;
segment_generation = 'segment_generation_2';
modify historic_info_customers key=idx2;
if not _iorc_ then do;
if segment_belonging ne segment_generation_2 then do;
valid_to_date=date-1;
replace;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation='segment_generation_2';
segment_belonging=segment_generation_2;
output;
end;
end;
else do;
_error_=0;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation='segment_generation_2';
segment_belonging=segment_generation_2;
output;
end;
run;
%mend;
"Is macro update_history2 meant to update rows where segment_generation = 'segment_generation_1' ?"
No, it is not supposed to do that.
How do you see that that is actually happening?
See code suggestion above.
This works, thanks.
Why does it not work to use the same thing for idx as for idx2?
proc sql;
create index idx on historic_info_customers(customer_ID,segment_generation,valid_to_date);
quit;
@SasStatistics wrote:
This works, thanks.
Why does it not work to use the same thing for idx as for idx2?proc sql; create index idx on historic_info_customers(customer_ID,segment_generation,valid_to_date); quit;
I imagine it could. I just created an idx2 because you had previously created an index with the name idx.
You can generalize your macro for multiple segments if there are going to be segments beyond the first two:
%macro update_historyN(snapshot,segment);
data historic_info_customers;
set &snapshot;
valid_to_date='31dec9999'd;
segment_generation = trim("segment_generation_&segment");
modify historic_info_customers key=idx2;
if not _iorc_ then do;
if segment_belonging ne segment_generation_&segment then do;
valid_to_date=date-1;
replace;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation=trim("segment_generation_&segment");
segment_belonging=segment_generation_&segment;
output;
end;
end;
else do;
_error_=0;
valid_from_date=date;
valid_to_date='31dec9999'd;
segment_generation=trim("segment_generation_&segment");
segment_belonging=segment_generation_&segment;
output;
end;
run;
%mend;
data customer_table_snapshot_6;
input date :yymmdd10. customer_id segment_generation_1 $ segment_generation_2 $ segment_generation_3 $;
format date yymmdd10.;
datalines;
2023-09-27 111 A C D
2023-09-27 222 C C E
2023-09-27 333 B B F
;
run;
data customer_table_snapshot_7;
input date :yymmdd10. customer_id segment_generation_1 $ segment_generation_2 $ segment_generation_3 $;
format date yymmdd10.;
datalines;
2023-10-21 111 A C F
2023-10-21 222 C C E
2023-10-21 333 B B D
;
run;
%update_historyN(customer_table_snapshot_6,3);
%update_historyN(customer_table_snapshot_7,3);
data actual_result;
set historic_info_customers;
run;
proc sort data=actual_result;
by customer_id segment_generation valid_from_date;
run;
@SasStatistics wrote:
How do you see that that is actually happening?
I ran your code by submitting it step by step. When you run the 2nd macro for the first time on snapshot 3, and look at the resulting file, you can see that rows for segment 1 were modified.
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.