DATA Step, Macro, Functions and more

sampling by Category

Accepted Solution Solved
Reply
Contributor
Posts: 28
Accepted Solution

sampling by Category

 

Hello everyone,

I have  100 accounts and for each account I have 3 years worth of monthly observations. Now this is what I am trying to do:

I need to get one  random observation from each account in the second year. can someone share some code or thoughts please?

(I have no idea how to do this)

Thanks!

bill


Accepted Solutions
Solution
‎01-27-2016 04:37 PM
Super User
Posts: 11,343

Re: sampling by Category

Posted in reply to nismail1976

Here's an example that starts from reading a data set.


data have;
   informat account $6. date mmddyy10.;
   format date mmddyy10.;
   input account $ date balance;
datalines;
123456   01/01/2010     7503.96                                                                   
123456   02/01/2010     3209.12                                                                   
123456   03/01/2010     1783.9                                                                    
123456   04/01/2010     9060.33                                                                   
123456   05/01/2010     3571.17                                                                   
123456   06/01/2010     2211.14                                                                   
123456   07/01/2010     7864.38                                                                   
123456   08/01/2010     3980.82                                                                   
123456   09/01/2010     1246.65                                                                   
123456   10/01/2010     1876.86                                                                   
123456   11/01/2010     7761.84                                                                   
123456   12/01/2010     4360.74                                                                   
123456   01/01/2011     9675                                                                      
123456   02/01/2011     2637.03                                                                   
123456   03/01/2011     7139.34                                                                   
123456   04/01/2011     5548.58                                                                   
123456   05/01/2011     5312.5                                                                    
123456   06/01/2011     8613.45                                                                   
123456   07/01/2011     1420.78                                                                   
123456   08/01/2011     8604.25                                                                   
123456   09/01/2011     6530.96                                                                   
123456   10/01/2011     7699.56                                                                   
123456   11/01/2011     7021.56                                                                   
123456   12/01/2011     3287.5                                                                    
123456   01/01/2012     5220.85                                                                   
123456   02/01/2012     9333.98                                                                   
123456   03/01/2012     464.67                                                                    
123456   04/01/2012     6148.54                                                                   
123456   05/01/2012     6575.28                                                                   
123456   06/01/2012     3726.44                                                                   
123456   07/01/2012     5884.72                                                                   
123456   08/01/2012     3122.34                                                                   
123456   09/01/2012     5392.72                                                                   
123456   10/01/2012     2817.66                                                                   
123456   11/01/2012     5282.22                                                                   
123456   12/01/2012     5584.1                                                                    
abcdef   01/01/2011     982.19                                                                    
abcdef   02/01/2011     1087.89                                                                   
abcdef   03/01/2011     5816.67                                                                   
abcdef   04/01/2011     6748.67                                                                   
abcdef   05/01/2011     4827.11                                                                   
abcdef   06/01/2011     1459.01                                                                   
abcdef   07/01/2011     7511.13                                                                   
abcdef   08/01/2011     9609.37                                                                   
abcdef   09/01/2011     7579.12                                                                   
abcdef   10/01/2011     9603.67                                                                   
abcdef   11/01/2011     941.16                                                                    
abcdef   12/01/2011     6773.56                                                                   
abcdef   01/01/2012     8921                                                                      
abcdef   02/01/2012     2841.61                                                                   
abcdef   03/01/2012     6610.11                                                                   
abcdef   04/01/2012     7994.27                                                                   
abcdef   05/01/2012     7926.96                                                                   
abcdef   06/01/2012     3194.98                                                                   
abcdef   07/01/2012     7788.71                                                                   
abcdef   08/01/2012     5450.36                                                                   
abcdef   09/01/2012     7049.72                                                                   
abcdef   10/01/2012     8322.97                                                                   
abcdef   11/01/2012     969.01                                                                    
abcdef   12/01/2012     721.47                                                                    
abcdef   01/01/2013     2530.07                                                                   
abcdef   02/01/2013     2339.33                                                                   
abcdef   03/01/2013     7936.15                                                                   
abcdef   04/01/2013     6852.09                                                                   
abcdef   05/01/2013     2589.46                                                                   
abcdef   06/01/2013     8571.48                                                                   
abcdef   07/01/2013     4732.78                                                                   
abcdef   08/01/2013     6865.38                                                                   
abcdef   09/01/2013     7025.47                                                                   
abcdef   10/01/2013     5461.62                                                                   
abcdef   11/01/2013     754.21                                                                    
abcdef   12/01/2013     6273.52                                                                   
;
run;

/* one of many ways to get one record per account per year*/
proc summary data=have nway;
   class account date;
   format date year4.;
   var balance;
   output out=HaveCount n=;
run;
/* get a value for second year for each account*/
data SecondYear;
   set HaveCount;
   by account;
   retain SecondYearFlag 0;
   if first.account then SecondYearFlag=1;
   else SecondYearFlag+1;
   Year = year(date);
   if SecondYearFlag=2;
   keep account year;
run;
/* get all of the data back for just the second year*/

proc sql;
   create table SecondYearData as
   select have.*
   from secondyear left join have on 
        secondyear.account=have.account and secondyear.year= year(have.date)
   order by have.account, have.date
   ;
quit;

proc surveyselect data=secondyeardata out=want 
     sampsize=1;
     strata account;
run;

View solution in original post


All Replies
Super User
Posts: 11,343

Re: sampling by Category

Posted in reply to nismail1976

It would help to have some data so we can see what you have. Do you have a date associated with each record? Is it a SAS date value?

 

The generic approach would be:

Identify the years associated with each account (which is very easy if there are SAS date values with each record)

Use that info to find the "second year".

Use that to select the appropriate records

Then Proc surveyselect with account as a strata variable and a sample size of 1 for each strata.

Contributor
Posts: 28

Re: sampling by Category

Hi,

yeah , I do have a date value associated with each account but it is not sas date value. here I have included a sample of one account:

 

 


account_data.JPG
Solution
‎01-27-2016 04:37 PM
Super User
Posts: 11,343

Re: sampling by Category

Posted in reply to nismail1976

Here's an example that starts from reading a data set.


data have;
   informat account $6. date mmddyy10.;
   format date mmddyy10.;
   input account $ date balance;
datalines;
123456   01/01/2010     7503.96                                                                   
123456   02/01/2010     3209.12                                                                   
123456   03/01/2010     1783.9                                                                    
123456   04/01/2010     9060.33                                                                   
123456   05/01/2010     3571.17                                                                   
123456   06/01/2010     2211.14                                                                   
123456   07/01/2010     7864.38                                                                   
123456   08/01/2010     3980.82                                                                   
123456   09/01/2010     1246.65                                                                   
123456   10/01/2010     1876.86                                                                   
123456   11/01/2010     7761.84                                                                   
123456   12/01/2010     4360.74                                                                   
123456   01/01/2011     9675                                                                      
123456   02/01/2011     2637.03                                                                   
123456   03/01/2011     7139.34                                                                   
123456   04/01/2011     5548.58                                                                   
123456   05/01/2011     5312.5                                                                    
123456   06/01/2011     8613.45                                                                   
123456   07/01/2011     1420.78                                                                   
123456   08/01/2011     8604.25                                                                   
123456   09/01/2011     6530.96                                                                   
123456   10/01/2011     7699.56                                                                   
123456   11/01/2011     7021.56                                                                   
123456   12/01/2011     3287.5                                                                    
123456   01/01/2012     5220.85                                                                   
123456   02/01/2012     9333.98                                                                   
123456   03/01/2012     464.67                                                                    
123456   04/01/2012     6148.54                                                                   
123456   05/01/2012     6575.28                                                                   
123456   06/01/2012     3726.44                                                                   
123456   07/01/2012     5884.72                                                                   
123456   08/01/2012     3122.34                                                                   
123456   09/01/2012     5392.72                                                                   
123456   10/01/2012     2817.66                                                                   
123456   11/01/2012     5282.22                                                                   
123456   12/01/2012     5584.1                                                                    
abcdef   01/01/2011     982.19                                                                    
abcdef   02/01/2011     1087.89                                                                   
abcdef   03/01/2011     5816.67                                                                   
abcdef   04/01/2011     6748.67                                                                   
abcdef   05/01/2011     4827.11                                                                   
abcdef   06/01/2011     1459.01                                                                   
abcdef   07/01/2011     7511.13                                                                   
abcdef   08/01/2011     9609.37                                                                   
abcdef   09/01/2011     7579.12                                                                   
abcdef   10/01/2011     9603.67                                                                   
abcdef   11/01/2011     941.16                                                                    
abcdef   12/01/2011     6773.56                                                                   
abcdef   01/01/2012     8921                                                                      
abcdef   02/01/2012     2841.61                                                                   
abcdef   03/01/2012     6610.11                                                                   
abcdef   04/01/2012     7994.27                                                                   
abcdef   05/01/2012     7926.96                                                                   
abcdef   06/01/2012     3194.98                                                                   
abcdef   07/01/2012     7788.71                                                                   
abcdef   08/01/2012     5450.36                                                                   
abcdef   09/01/2012     7049.72                                                                   
abcdef   10/01/2012     8322.97                                                                   
abcdef   11/01/2012     969.01                                                                    
abcdef   12/01/2012     721.47                                                                    
abcdef   01/01/2013     2530.07                                                                   
abcdef   02/01/2013     2339.33                                                                   
abcdef   03/01/2013     7936.15                                                                   
abcdef   04/01/2013     6852.09                                                                   
abcdef   05/01/2013     2589.46                                                                   
abcdef   06/01/2013     8571.48                                                                   
abcdef   07/01/2013     4732.78                                                                   
abcdef   08/01/2013     6865.38                                                                   
abcdef   09/01/2013     7025.47                                                                   
abcdef   10/01/2013     5461.62                                                                   
abcdef   11/01/2013     754.21                                                                    
abcdef   12/01/2013     6273.52                                                                   
;
run;

/* one of many ways to get one record per account per year*/
proc summary data=have nway;
   class account date;
   format date year4.;
   var balance;
   output out=HaveCount n=;
run;
/* get a value for second year for each account*/
data SecondYear;
   set HaveCount;
   by account;
   retain SecondYearFlag 0;
   if first.account then SecondYearFlag=1;
   else SecondYearFlag+1;
   Year = year(date);
   if SecondYearFlag=2;
   keep account year;
run;
/* get all of the data back for just the second year*/

proc sql;
   create table SecondYearData as
   select have.*
   from secondyear left join have on 
        secondyear.account=have.account and secondyear.year= year(have.date)
   order by have.account, have.date
   ;
quit;

proc surveyselect data=secondyeardata out=want 
     sampsize=1;
     strata account;
run;
Contributor
Posts: 28

Re: sampling by Category

Thank you so much, you are amazing!

Contributor
Posts: 28

Re: sampling by Category

I really appreciate it!

☑ This topic is solved.

Need further help from the community? Please ask a new question.

Discussion stats
  • 5 replies
  • 267 views
  • 0 likes
  • 2 in conversation