Solved: how do i retaining values by id

ger15xxhcker · Posted 07-17-2019 03:06 PM

Hello!

I would like to produce 3 sets under the following conditions. How could this be done?

Thanks!

data test;
 input id amt;
 datalines;
 1 5
 1 5
 1 106
 2 90
 2 100
 3 111
 3 111
 3 111
 3 111
 4 837
 4 838
 4 839
 ;
run;

data want01; /*01 after the first two occurrences, the difference is greater than 20% compared to the third one*/
 input id flg$;
 datalines;
 1 Y
 ;
run;
data want02; /*02 where the amt field is constant*/
 input id flg$;
 datalines;
 3 Y
 ;
run;
data want03; /*03 where the amt field always changeing*/
 input id flg$;
 datalines;
 2 Y
 4 Y
 ;
run;

ScottBass · Posted 07-18-2019 12:30 AM

I'm sure there are data step approaches but I work with RDBMS's so much that I often think in SQL.

If your real datasets are huge this might not perform well.

Hope this helps...

* source data ;
data have;
 input id amt;
 datalines;
 1 5
 1 5
 1 106
 2 90
 2 100
 3 111
 3 111
 3 111
 3 111
 4 837
 4 838
 4 839
 ;
run;

* create two keys - this could be a view if your data is large ;
data have;
   format sk gk;
   sk+1;
   set have;
   by id;
   if first.id then gk=0;
   gk+1;
run;

/*01 after the first two occurrences, the difference is greater than 20% compared to the third one*/
* this is unclear but here is a start ;
proc sql _method;
   * for debugging ;
   create table want1_debug as
   select a.id, a.amt, b.amt as amt_3
   from have a
   left join have b
   on a.id=b.id
   and b.gk=3
   order by a.sk;

   * for real ;
   create table want1 as
   select distinct a.id
   from have a
   left join have b
   on a.id=b.id
   and b.gk=3
   where b.amt > a.amt*1.2;
quit;

/*02 where the amt field is constant*/
proc sql _method;
   * for debugging ;
   create table want2_debug as
   select id,min(amt) as min_amt,max(amt) as max_amt
   from have
   group by id;

   * for real ;
   create table want2 as
   select distinct id 
   from (
   select id,min(amt) as min_amt,max(amt) as max_amt
   from have
   group by id
   ) x
   where x.min_amt = x.max_amt;
quit;

/*03 where the amt field always changeing*/
proc sql _method;
   * for debugging ;
   create table want3_debug as
   select a.id,a.amt,b.amt as next_amt,(a.amt ^= b.amt or b.amt is missing) as flag
   from have a
   left join have b
   on a.id=b.id
   and a.sk+1=b.sk
   order by a.sk;

   * for real ;
   create table want3 as
   select distinct id
   from (
   select id,min(flag) as min_flag 
   from (
   select a.id,a.amt,b.amt as next_amt,(a.amt ^= b.amt or b.amt is missing) as flag
   from have a
   left join have b
   on a.id=b.id
   and a.sk+1=b.sk
   ) x
   group by id
   having min_flag ^= 0
   ) x;
quit;

Please post your question as a self-contained data step in the form of "have" (source) and "want" (desired results).
I won't contribute to your post if I can't cut-and-paste your syntactically correct code into SAS.

View solution in original post

ChrisNZ · Posted 07-17-2019 11:22 PM

What does this mean?

 after the first two occurrences, the difference is greater than 20% compared to the third one

High-Performance SAS Coding - Third Edition

ger15xxhcker · Posted 07-18-2019 01:48 AM

Sorry for the wrong wording. An example:

1. row id =1 amt = 10

2. row id =1 amt = 11

3. row id =1 amt = 13 (this one is 18,12% higher then the second one)

4. row id=2 amt = 10

5. row id=2 amt = 11

6. row id=2 amt = 14 this third one is 27,2% higher then the second one (which is for id = 2)

i'd like to compare this values. If the third one is >= 20% then take a flag.

Thanks for your help!

ScottBass · Posted 07-18-2019 12:30 AM

I'm sure there are data step approaches but I work with RDBMS's so much that I often think in SQL.

If your real datasets are huge this might not perform well.

Hope this helps...

* source data ;
data have;
 input id amt;
 datalines;
 1 5
 1 5
 1 106
 2 90
 2 100
 3 111
 3 111
 3 111
 3 111
 4 837
 4 838
 4 839
 ;
run;

* create two keys - this could be a view if your data is large ;
data have;
   format sk gk;
   sk+1;
   set have;
   by id;
   if first.id then gk=0;
   gk+1;
run;

/*01 after the first two occurrences, the difference is greater than 20% compared to the third one*/
* this is unclear but here is a start ;
proc sql _method;
   * for debugging ;
   create table want1_debug as
   select a.id, a.amt, b.amt as amt_3
   from have a
   left join have b
   on a.id=b.id
   and b.gk=3
   order by a.sk;

   * for real ;
   create table want1 as
   select distinct a.id
   from have a
   left join have b
   on a.id=b.id
   and b.gk=3
   where b.amt > a.amt*1.2;
quit;

/*02 where the amt field is constant*/
proc sql _method;
   * for debugging ;
   create table want2_debug as
   select id,min(amt) as min_amt,max(amt) as max_amt
   from have
   group by id;

   * for real ;
   create table want2 as
   select distinct id 
   from (
   select id,min(amt) as min_amt,max(amt) as max_amt
   from have
   group by id
   ) x
   where x.min_amt = x.max_amt;
quit;

/*03 where the amt field always changeing*/
proc sql _method;
   * for debugging ;
   create table want3_debug as
   select a.id,a.amt,b.amt as next_amt,(a.amt ^= b.amt or b.amt is missing) as flag
   from have a
   left join have b
   on a.id=b.id
   and a.sk+1=b.sk
   order by a.sk;

   * for real ;
   create table want3 as
   select distinct id
   from (
   select id,min(flag) as min_flag 
   from (
   select a.id,a.amt,b.amt as next_amt,(a.amt ^= b.amt or b.amt is missing) as flag
   from have a
   left join have b
   on a.id=b.id
   and a.sk+1=b.sk
   ) x
   group by id
   having min_flag ^= 0
   ) x;
quit;

Please post your question as a self-contained data step in the form of "have" (source) and "want" (desired results).
I won't contribute to your post if I can't cut-and-paste your syntactically correct code into SAS.

ger15xxhcker · Posted 07-18-2019 02:35 AM

Thank, you are right full of name conventions next time i will do so! Your code is perfect for me. Only the first method need some changes. How can i do if I only look at the more then 20% increase, not the decrease. So i modify my question: If the first 2 record is the same and the third one is more then 20 increase.

I really appreciate it!

how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Re: how do i retaining values by id

Registration is open

SAS Training: Just a Click Away