How do I create code for A1 and A2 by combining rows (A5, A6, A7, A8) to get a random sample of 20000 capped records and then have the remaining records fall into (A5, A6, A7, A8) proportionally as seen in Screen Print? Column E shows the final Counts.
data have;
do i=1 to 5442;
id+1;mkt_id='A05';output;
end;
do i=1 to 26268;
id+1;mkt_id='A06';output;
end;
do i=1 to 47267;
id+1;mkt_id='A07';output;
end;
do i=1 to 174980;
id+1;mkt_id='A08';output;
end;
run;
data temp;
set have;
if mkt_id in ('A05' 'A07') then group='A01';
else group='A02';
run;
proc sort data=temp;by group;run;
/*a random sample of 20000*/
proc surveyselect data=temp out=want1 sampsize=(4151 15849) seed=123 outrandom noprint;
strata group;
run;
proc freq data=want1 ;
table group/list;
run;
/* remaining records */
proc sql;
create table want2 as
select * from temp where id not in (select id from want1);
quit;
proc freq data=want2;
table mkt_id/list;
run;
Hello @creece37 and welcome to the SAS Support Communities!
I would use PROC SURVEYSELECT with the option ALLOC=PROP of the STRATA statement:
/* Create example data for demonstration */
data have(drop=i);
do i=1 to 253957;
MKT_ID=put(0A05x+(i>5442)+(i>31710)+(i>78977),hex3.);
output;
end;
run;
/* Show distribution of MKT_ID values */
proc freq data=have;
tables MKT_ID;
run;
/* Draw random sample */
proc surveyselect data=have outall noprint
method=srs n=20000
seed=2718 out=samp;
strata MKT_ID / alloc=prop;
run;
/* Assign final MKT_IDs */
data want(drop=selected total--samplingweight);
set samp;
if selected then substr(MKT_ID,3)=ifc(MKT_ID in ('A05','A07'),'1','2');
run;
/* Check frequencies */
proc freq data=want;
tables MKT_ID;
run;
data have;
do i=1 to 5442;
id+1;mkt_id='A05';output;
end;
do i=1 to 26268;
id+1;mkt_id='A06';output;
end;
do i=1 to 47267;
id+1;mkt_id='A07';output;
end;
do i=1 to 174980;
id+1;mkt_id='A08';output;
end;
run;
data temp;
set have;
if mkt_id in ('A05' 'A07') then group='A01';
else group='A02';
run;
proc sort data=temp;by group;run;
/*a random sample of 20000*/
proc surveyselect data=temp out=want1 sampsize=(4151 15849) seed=123 outrandom noprint;
strata group;
run;
proc freq data=want1 ;
table group/list;
run;
/* remaining records */
proc sql;
create table want2 as
select * from temp where id not in (select id from want1);
quit;
proc freq data=want2;
table mkt_id/list;
run;
Hi,
I think @FreelanceReinh got better solution. His code could give you exact number as you showed in picture.
But mine couldn't .
Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.