Hi everyone,
I had data like this:
PIN | date_1 | D_VISIT |
3 | 01/30/2013 | |
3 | 06/19/2014 | 6/19/2014 |
3 | 01/28/2015 | 11/12/2015 |
5 | 02/27/2013 | |
5 | 03/14/2014 | 3/14/2014 |
5 | 05/29/2015 | |
5 | 02/05/2016 | |
5 | 02/10/2017 | 2/10/2017 |
6 | 04/15/2013 | |
6 | 04/01/2014 | 4/1/2014 |
6 | 10/05/2015 | |
6 | 06/20/2016 | |
6 | 01/31/2017 | 1/31/2017 |
I hope I could fill in the missing value by the closest date_1
PIN | date_1 | D_VISIT |
3 | 01/30/2013 | 6/19/2014 |
3 | 06/19/2014 | 6/19/2014 |
3 | 01/28/2015 | 11/12/2015 |
5 | 02/27/2013 | 3/14/2014 |
5 | 03/14/2014 | 3/14/2014 |
5 | 05/29/2015 | 3/14/2014 |
5 | 02/05/2016 | 2/10/2017 |
5 | 02/10/2017 | 2/10/2017 |
6 | 04/15/2013 | 4/1/2014 |
6 | 04/01/2014 | 4/1/2014 |
6 | 10/05/2015 | 4/1/2014 |
6 | 06/20/2016 | 1/31/2017 |
6 | 01/31/2017 | 1/31/2017 |
I searched topics and tried to use some codes:
data want;
do _n_ = 1 by 1 until (last.pin);
set original_data;
by pin;
if missing(first_d_visit) and not missing(d_visit) then
first_d_visit = d_visit;
end;
prev_d_visit = first_d_visit;
do _n_ = 1 by 1 until (last.pin);
set original;
by pin;
if missing(d_visit) then
d_visit = prev_d_visit;
prev_d_visit = d_visit;
output;
end;
format first_d_visit mmddyy10. prev_d_visit mmddyy10.;
run;
But didn't give me the closest dates, but the previous dates.
Any suggestion would be really appreciated
data have;
infile cards truncover;
input pin date_1 : mmddyy10. d_visit : mmddyy10. ;
format date_1 d_visit mmddyy10.;
datalines ;
3 1/30/2013
3 6/19/2014 6/19/2014
3 1/28/2015 11/12/2015
5 2/27/2013
5 3/14/2014 3/14/2014
5 5/29/2015
5 2/5/2016
5 2/10/2017 2/10/2017
6 4/15/2013
6 4/1/2014 4/1/2014
6 10/5/2015
6 6/20/2016
6 1/31/2017 1/31/2017
;
run;
proc sql;
create table want as
select a.pin,a.date_1,coalesce(a.d_visit,b.d_visit) as d_visit format=mmddyy10.
from have as a , have as b
where a.pin=b.pin and b.d_visit is not missing
group by a.pin,a.date_1
having abs(a.date_1-b.d_visit)=min(abs(a.date_1-b.d_visit));
quit;
data have;
input in $1-2 date_1 $3-13 d_visit $15-25 ;
datalines ;
3 1/30/2013
3 6/19/2014 6/19/2014
3 1/28/2015 11/12/2015
5 2/27/2013
5 3/14/2014 3/14/2014
5 5/29/2015
5 2/5/2016
5 2/10/2017 2/10/2017
6 4/15/2013
6 4/1/2014 4/1/2014
6 10/5/2015
6 6/20/2016
6 1/31/2017 1/31/2017
;
run;
data d ;
set have ;
by in;
if first.in then col=1;
else col+1 ;
inn=input (in,best.);
run;
%macro all;
proc sql noprint;
select distinct in into :allin separated by ' '
from d;
quit;
%put >> &allin ;
%let count= %sysfunc (countw (&allin));
%put >> &count ;
%do i = 1 %to &count ;
%let subset = %qscan (&allin , &i);
data d2 ;
set d end= eof ;
by inn;
where inn = &subset ;
if last.inn then lim=col;
vis2=d_visit;
if eof then call symput ('last' , compress (put(_n_,8.)));
run;
%put >>> &last ;
data d3 (keep= dt: inn) ;
array dt {&last } $200 ;
do until (last.in );
set d2 ;
by in;
dt (col)= vis2;
end;
run;
data d4 ;
merge d2 (in=a) d3 (in=b) ;
by inn ;
colcal= col-1;
colcal2= col+1;
prevvar= 'dt' || strip (colcal);
nextvar= 'dt' || strip (colcal2);
run;
%macro var;
data d5 ;
%do j= 1 %to &last ;
set d4 ;
if prevvar= "dt&j" then prevvalue= dt&j ;
if nextvar= "dt&j" then nextvalue= dt&j ;
%end;
/* main processing */
if d_visit= '' then do ;
if prevvar='dt0' then d_visit=nextvalue ;
if prevvar ne 'dt0' then do ;
d_visit= COALESCEC (prevvalue, nextvalue);
end;
end;
run;
%mend var;
%var;
data d6_&i ;
set d5 ;
keep in date_1 d_visit inn ;
run;
%end;
%mend all ;
%all ;
data f ;
set d6: ;
run;
proc sort data=f out= want (drop= inn) ; by inn ; run;
Thanks so much for your help. But I have more than 5000 obs, and the drive keeps warning me there is no enough space to save the temporary data.
Have a look at:
data want(keep= pin date_1 d_visit);
array dates[100] _temporary_;
*call missing(of dates[*]);
i = 1;
do _n_ = 1 by 1 until(last.pin);
set work.have;
by pin;
if not missing(d_visit) then do;
dates[i] = d_visit;
i = i + 1;
end;
end;
lastDate = i - 1;
do _n_ = 1 by 1 until(last.pin);
set work.have;
by pin;
if missing(d_visit) then do;
minDiff = 100000;
minId = .;
do i = 1 to lastDate;
d = abs(date_1 - dates[i]);
if d < minDiff then do;
minDiff = d;
minId = i;
end;
end;
d_visit = dates[minId];
end;
output;
end;
run;
You will find one difference to the expected d_visit-value in the third obs for pin=6. I double checked it: the difference between 5h Oct 2015 and 1st Apr 2014 is 552 days, the difference to 31st Jan 2017 only 484 days.
Thanks so much. It's my bad and I haven't made it clear, that I have more than 5000 obs, and when I run the codes, it only generated the first 149 obs output.
ERROR: Array subscript out of range at line 549 column 20.
When I tried to change the array to more than 5000, it still gave me the same ERROR, may I ask how to solve the problem?
Thanks again
@linda0910 wrote:
Thanks so much. It's my bad and I haven't made it clear, that I have more than 5000 obs, and when I run the codes, it only generated the first 149 obs output.
ERROR: Array subscript out of range at line 549 column 20.
When I tried to change the array to more than 5000, it still gave me the same ERROR, may I ask how to solve the problem?
Thanks again
Please post the log with the error using "insert code" - the </> icon.
data have;
infile cards truncover;
input pin date_1 : mmddyy10. d_visit : mmddyy10. ;
format date_1 d_visit mmddyy10.;
datalines ;
3 1/30/2013
3 6/19/2014 6/19/2014
3 1/28/2015 11/12/2015
5 2/27/2013
5 3/14/2014 3/14/2014
5 5/29/2015
5 2/5/2016
5 2/10/2017 2/10/2017
6 4/15/2013
6 4/1/2014 4/1/2014
6 10/5/2015
6 6/20/2016
6 1/31/2017 1/31/2017
;
run;
proc sql;
create table want as
select a.pin,a.date_1,coalesce(a.d_visit,b.d_visit) as d_visit format=mmddyy10.
from have as a , have as b
where a.pin=b.pin and b.d_visit is not missing
group by a.pin,a.date_1
having abs(a.date_1-b.d_visit)=min(abs(a.date_1-b.d_visit));
quit;
Catch the best of SAS Innovate 2025 — anytime, anywhere. Stream powerful keynotes, real-world demos, and game-changing insights from the world’s leading data and AI minds.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.