Hello,
I have a dataset that looks like this:
| APP_ID | QUARTER | 
| APP1 | 2019Q1 | 
| APP2 | 2019Q1 | 
| APP3 | 2019Q1 | 
| APP4 | 2019Q1 | 
| APP5 | 2019Q1 | 
| APP6 | 2019Q1 | 
| APP7 | 2019Q1 | 
| APP8 | 2019Q1 | 
| APP9 | 2019Q1 | 
| APP10 | 2019Q1 | 
| APP11 | 2019Q2 | 
| APP12 | 2019Q2 | 
| APP13 | 2019Q2 | 
| APP14 | 2019Q2 | 
| APP15 | 2019Q2 | 
| APP16 | 2019Q2 | 
| APP17 | 2019Q2 | 
| APP18 | 2019Q2 | 
| APP19 | 2019Q2 | 
| APP20 | 2019Q2 | 
| APP21 | 2019Q2 | 
| APP22 | 2019Q2 | 
| APP23 | 2019Q2 | 
| APP24 | 2019Q2 | 
| APP25 | 2019Q2 | 
| APP26 | 2019Q2 | 
| APP27 | 2019Q2 | 
| APP28 | 2019Q3 | 
| APP29 | 2019Q3 | 
| APP30 | 2019Q3 | 
| APP31 | 2019Q4 | 
| APP32 | 2019Q4 | 
| APP33 | |
| APP34 | |
| APP35 | |
| APP36 | |
| APP37 | |
| APP38 | |
| APP39 | |
| APP40 | |
| APP41 | |
| APP42 | |
| APP43 | |
| APP44 | |
| APP45 | |
| APP46 | |
| APP47 | |
| APP48 | 
And the distribution of frequencies:
| QUARTER | COUNT | FREQ | 
| 2019Q1 | 10 | 20,8% | 
| 2019Q2 | 17 | 35,4% | 
| 2019Q3 | 3 | 6,3% | 
| 2019Q4 | 2 | 4,2% | 
| 16 | 33,3% | 
I need to fill missing values with other quarters in such way that distribution of frequencies would be almost uniform (all values occur with almost the same frequency as much as possible).
The number of application can change. As time goes in year, as the number can increase. Now there may appear applications with quarters 2019Q2 or 2019Q3 or missing values, but for example in december they can be application with 2019Q3 or 2019Q4 or missing values.
Missing values can only be filled with quarters from the next year.
The non-missing values cannot be changed.
At this moment it could look like this:
| APP_ID | QUARTER | 
| APP1 | 2019Q1 | 
| APP2 | 2019Q1 | 
| APP3 | 2019Q1 | 
| APP4 | 2019Q1 | 
| APP5 | 2019Q1 | 
| APP6 | 2019Q1 | 
| APP7 | 2019Q1 | 
| APP8 | 2019Q1 | 
| APP9 | 2019Q1 | 
| APP10 | 2019Q1 | 
| APP11 | 2019Q2 | 
| APP12 | 2019Q2 | 
| APP13 | 2019Q2 | 
| APP14 | 2019Q2 | 
| APP15 | 2019Q2 | 
| APP16 | 2019Q2 | 
| APP17 | 2019Q2 | 
| APP18 | 2019Q2 | 
| APP19 | 2019Q2 | 
| APP20 | 2019Q2 | 
| APP21 | 2019Q2 | 
| APP22 | 2019Q2 | 
| APP23 | 2019Q2 | 
| APP24 | 2019Q2 | 
| APP25 | 2019Q2 | 
| APP26 | 2019Q2 | 
| APP27 | 2019Q2 | 
| APP28 | 2019Q3 | 
| APP29 | 2019Q3 | 
| APP30 | 2019Q3 | 
| APP31 | 2019Q4 | 
| APP32 | 2019Q4 | 
| APP33 | 2019Q3 | 
| APP34 | 2019Q3 | 
| APP35 | 2019Q3 | 
| APP36 | 2019Q3 | 
| APP37 | 2019Q3 | 
| APP38 | 2019Q3 | 
| APP39 | 2019Q3 | 
| APP40 | 2019Q3 | 
| APP41 | 2019Q4 | 
| APP42 | 2019Q4 | 
| APP43 | 2019Q4 | 
| APP44 | 2019Q4 | 
| APP45 | 2019Q4 | 
| APP46 | 2019Q4 | 
| APP47 | 2019Q4 | 
| APP48 | 2019Q4 | 
And then distribution of frequencies may be:
| QUARTER | COUNT | FREQ | 
| 2019Q1 | 10 | 20,8% | 
| 2019Q2 | 17 | 35,4% | 
| 2019Q3 | 11 | 22,9% | 
| 2019Q4 | 10 | 20,8% | 
| 0 | 0,0% | 
I will be thankful for any suggestions.
P.S. Sorry for my english.
Hi. There was a flaw in the code of my previous post, but here is a fix. Clumsy in places but it works.
data have;
	input app_id  $8. quarter $6.;
	datalines;
APP1    2019Q1
APP2    2019Q1
APP3    2019Q1
APP4    2019Q1
APP5    2019Q1
APP6    2019Q1
APP7    2019Q1
APP8    2019Q1
APP9    2019Q1
APP10   2019Q1
APP11   2019Q2
APP12   2019Q2
APP13   2019Q2
APP14   2019Q2
APP15   2019Q2
APP16   2019Q2
APP17   2019Q2
APP18   2019Q2
APP19   2019Q2
APP20   2019Q2
APP21   2019Q2
APP22   2019Q2
APP23   2019Q2
APP24   2019Q2
APP25   2019Q2
APP26   2019Q2
APP27   2019Q2
APP28   2019Q3
APP29   2019Q3
APP30   2019Q3
APP31   2019Q4
APP32   2019Q4
APP33   
APP34   
APP35    
APP36    
APP37    
APP38    
APP39    
APP40    
APP41    
APP42    
APP43    
APP44    
APP45    
APP46    
APP47    
APP48    
;
run;
proc sql noprint;
	select count(distinct quarter), count(*) /count(distinct quarter) into :Nqtrs,:target from have;
quit;
proc sql;
	create table temp_a as 
	select quarter, count(*) as N
	from have 
	group by quarter
	having count(*)<=&target or quarter='';
	
	create table temp_b as 
	select quarter, 
		(select sum(N) from temp_a)/(select count (distinct quarter) from temp_a where quarter^='')-N as need , 
		(select N from temp_A where quarter='') as available 
		from temp_a 
		where quarter ^='';
quit;
data temp_c;set temp_b;
retain cumneed;
if _N_=1 then do; 
	cumneed=need; 
	need2=floor(need);
	end;
else do;
	cumneed=need+cumneed;
end;
cumneed2=floor(cumneed);
 need2=cumneed2-lag(cumneed2);
 if need2=. then need2=floor(need);
run;
data temp_d (keep=quarter);
	set temp_c end=eof;
	retain left ;
	if _N_=1 then left=available;
	do i=1 to need2;
		left=left-1;
		output;
	end;
run;
data temp_e;
	set have (where=(quarter=''));
	set temp_d;
run;
data want;
	set have (where=(quarter^='')) temp_e;
run;
*As a check , compare count from the have and want datasets;
proc sql;
	select a.*, b.want from (select h.quarter, count(*) as have from have h group 
		by h.quarter) a left join 
(select w.quarter, count(*) as want from want w group by w.quarter) b on 
		a.quarter=b.quarter;
quit; 
You should post it at Operation Research Forum.and calling @RobPratt
Hi. I've tested this with a few scenarios and it seems to give what you want.
I've edited to remove this code. Updated version is in another post
(Edit - this had errors so I removed the code)
Hi. There was a flaw in the code of my previous post, but here is a fix. Clumsy in places but it works.
data have;
	input app_id  $8. quarter $6.;
	datalines;
APP1    2019Q1
APP2    2019Q1
APP3    2019Q1
APP4    2019Q1
APP5    2019Q1
APP6    2019Q1
APP7    2019Q1
APP8    2019Q1
APP9    2019Q1
APP10   2019Q1
APP11   2019Q2
APP12   2019Q2
APP13   2019Q2
APP14   2019Q2
APP15   2019Q2
APP16   2019Q2
APP17   2019Q2
APP18   2019Q2
APP19   2019Q2
APP20   2019Q2
APP21   2019Q2
APP22   2019Q2
APP23   2019Q2
APP24   2019Q2
APP25   2019Q2
APP26   2019Q2
APP27   2019Q2
APP28   2019Q3
APP29   2019Q3
APP30   2019Q3
APP31   2019Q4
APP32   2019Q4
APP33   
APP34   
APP35    
APP36    
APP37    
APP38    
APP39    
APP40    
APP41    
APP42    
APP43    
APP44    
APP45    
APP46    
APP47    
APP48    
;
run;
proc sql noprint;
	select count(distinct quarter), count(*) /count(distinct quarter) into :Nqtrs,:target from have;
quit;
proc sql;
	create table temp_a as 
	select quarter, count(*) as N
	from have 
	group by quarter
	having count(*)<=&target or quarter='';
	
	create table temp_b as 
	select quarter, 
		(select sum(N) from temp_a)/(select count (distinct quarter) from temp_a where quarter^='')-N as need , 
		(select N from temp_A where quarter='') as available 
		from temp_a 
		where quarter ^='';
quit;
data temp_c;set temp_b;
retain cumneed;
if _N_=1 then do; 
	cumneed=need; 
	need2=floor(need);
	end;
else do;
	cumneed=need+cumneed;
end;
cumneed2=floor(cumneed);
 need2=cumneed2-lag(cumneed2);
 if need2=. then need2=floor(need);
run;
data temp_d (keep=quarter);
	set temp_c end=eof;
	retain left ;
	if _N_=1 then left=available;
	do i=1 to need2;
		left=left-1;
		output;
	end;
run;
data temp_e;
	set have (where=(quarter=''));
	set temp_d;
run;
data want;
	set have (where=(quarter^='')) temp_e;
run;
*As a check , compare count from the have and want datasets;
proc sql;
	select a.*, b.want from (select h.quarter, count(*) as have from have h group 
		by h.quarter) a left join 
(select w.quarter, count(*) as want from want w group by w.quarter) b on 
		a.quarter=b.quarter;
quit; 
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.
