How do I create code for A1 and A2 by combining rows (A5, A6, A7, A8) to get a random sample of 20000 capped records and then have the remaining records fall into (A5, A6, A7, A8) proportionally as seen in Screen Print? Column E shows the final Counts.
data have;
do i=1 to 5442;
id+1;mkt_id='A05';output;
end;
do i=1 to 26268;
id+1;mkt_id='A06';output;
end;
do i=1 to 47267;
id+1;mkt_id='A07';output;
end;
do i=1 to 174980;
id+1;mkt_id='A08';output;
end;
run;
data temp;
set have;
if mkt_id in ('A05' 'A07') then group='A01';
else group='A02';
run;
proc sort data=temp;by group;run;
/*a random sample of 20000*/
proc surveyselect data=temp out=want1 sampsize=(4151 15849) seed=123 outrandom noprint;
strata group;
run;
proc freq data=want1 ;
table group/list;
run;
/* remaining records */
proc sql;
create table want2 as
select * from temp where id not in (select id from want1);
quit;
proc freq data=want2;
table mkt_id/list;
run;
Hello @creece37 and welcome to the SAS Support Communities!
I would use PROC SURVEYSELECT with the option ALLOC=PROP of the STRATA statement:
/* Create example data for demonstration */
data have(drop=i);
do i=1 to 253957;
MKT_ID=put(0A05x+(i>5442)+(i>31710)+(i>78977),hex3.);
output;
end;
run;
/* Show distribution of MKT_ID values */
proc freq data=have;
tables MKT_ID;
run;
/* Draw random sample */
proc surveyselect data=have outall noprint
method=srs n=20000
seed=2718 out=samp;
strata MKT_ID / alloc=prop;
run;
/* Assign final MKT_IDs */
data want(drop=selected total--samplingweight);
set samp;
if selected then substr(MKT_ID,3)=ifc(MKT_ID in ('A05','A07'),'1','2');
run;
/* Check frequencies */
proc freq data=want;
tables MKT_ID;
run;
data have;
do i=1 to 5442;
id+1;mkt_id='A05';output;
end;
do i=1 to 26268;
id+1;mkt_id='A06';output;
end;
do i=1 to 47267;
id+1;mkt_id='A07';output;
end;
do i=1 to 174980;
id+1;mkt_id='A08';output;
end;
run;
data temp;
set have;
if mkt_id in ('A05' 'A07') then group='A01';
else group='A02';
run;
proc sort data=temp;by group;run;
/*a random sample of 20000*/
proc surveyselect data=temp out=want1 sampsize=(4151 15849) seed=123 outrandom noprint;
strata group;
run;
proc freq data=want1 ;
table group/list;
run;
/* remaining records */
proc sql;
create table want2 as
select * from temp where id not in (select id from want1);
quit;
proc freq data=want2;
table mkt_id/list;
run;
Hi,
I think @FreelanceReinh got better solution. His code could give you exact number as you showed in picture.
But mine couldn't .
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.