Hi everyone,
I had data like this:
PIN | date_1 | D_VISIT |
3 | 01/30/2013 | |
3 | 06/19/2014 | 6/19/2014 |
3 | 01/28/2015 | 11/12/2015 |
5 | 02/27/2013 | |
5 | 03/14/2014 | 3/14/2014 |
5 | 05/29/2015 | |
5 | 02/05/2016 | |
5 | 02/10/2017 | 2/10/2017 |
6 | 04/15/2013 | |
6 | 04/01/2014 | 4/1/2014 |
6 | 10/05/2015 | |
6 | 06/20/2016 | |
6 | 01/31/2017 | 1/31/2017 |
I hope I could fill in the missing value by the closest date_1
PIN | date_1 | D_VISIT |
3 | 01/30/2013 | 6/19/2014 |
3 | 06/19/2014 | 6/19/2014 |
3 | 01/28/2015 | 11/12/2015 |
5 | 02/27/2013 | 3/14/2014 |
5 | 03/14/2014 | 3/14/2014 |
5 | 05/29/2015 | 3/14/2014 |
5 | 02/05/2016 | 2/10/2017 |
5 | 02/10/2017 | 2/10/2017 |
6 | 04/15/2013 | 4/1/2014 |
6 | 04/01/2014 | 4/1/2014 |
6 | 10/05/2015 | 4/1/2014 |
6 | 06/20/2016 | 1/31/2017 |
6 | 01/31/2017 | 1/31/2017 |
I searched topics and tried to use some codes:
data want;
do _n_ = 1 by 1 until (last.pin);
set original_data;
by pin;
if missing(first_d_visit) and not missing(d_visit) then
first_d_visit = d_visit;
end;
prev_d_visit = first_d_visit;
do _n_ = 1 by 1 until (last.pin);
set original;
by pin;
if missing(d_visit) then
d_visit = prev_d_visit;
prev_d_visit = d_visit;
output;
end;
format first_d_visit mmddyy10. prev_d_visit mmddyy10.;
run;
But didn't give me the closest dates, but the previous dates.
Any suggestion would be really appreciated
data have;
infile cards truncover;
input pin date_1 : mmddyy10. d_visit : mmddyy10. ;
format date_1 d_visit mmddyy10.;
datalines ;
3 1/30/2013
3 6/19/2014 6/19/2014
3 1/28/2015 11/12/2015
5 2/27/2013
5 3/14/2014 3/14/2014
5 5/29/2015
5 2/5/2016
5 2/10/2017 2/10/2017
6 4/15/2013
6 4/1/2014 4/1/2014
6 10/5/2015
6 6/20/2016
6 1/31/2017 1/31/2017
;
run;
proc sql;
create table want as
select a.pin,a.date_1,coalesce(a.d_visit,b.d_visit) as d_visit format=mmddyy10.
from have as a , have as b
where a.pin=b.pin and b.d_visit is not missing
group by a.pin,a.date_1
having abs(a.date_1-b.d_visit)=min(abs(a.date_1-b.d_visit));
quit;
data have;
input in $1-2 date_1 $3-13 d_visit $15-25 ;
datalines ;
3 1/30/2013
3 6/19/2014 6/19/2014
3 1/28/2015 11/12/2015
5 2/27/2013
5 3/14/2014 3/14/2014
5 5/29/2015
5 2/5/2016
5 2/10/2017 2/10/2017
6 4/15/2013
6 4/1/2014 4/1/2014
6 10/5/2015
6 6/20/2016
6 1/31/2017 1/31/2017
;
run;
data d ;
set have ;
by in;
if first.in then col=1;
else col+1 ;
inn=input (in,best.);
run;
%macro all;
proc sql noprint;
select distinct in into :allin separated by ' '
from d;
quit;
%put >> &allin ;
%let count= %sysfunc (countw (&allin));
%put >> &count ;
%do i = 1 %to &count ;
%let subset = %qscan (&allin , &i);
data d2 ;
set d end= eof ;
by inn;
where inn = &subset ;
if last.inn then lim=col;
vis2=d_visit;
if eof then call symput ('last' , compress (put(_n_,8.)));
run;
%put >>> &last ;
data d3 (keep= dt: inn) ;
array dt {&last } $200 ;
do until (last.in );
set d2 ;
by in;
dt (col)= vis2;
end;
run;
data d4 ;
merge d2 (in=a) d3 (in=b) ;
by inn ;
colcal= col-1;
colcal2= col+1;
prevvar= 'dt' || strip (colcal);
nextvar= 'dt' || strip (colcal2);
run;
%macro var;
data d5 ;
%do j= 1 %to &last ;
set d4 ;
if prevvar= "dt&j" then prevvalue= dt&j ;
if nextvar= "dt&j" then nextvalue= dt&j ;
%end;
/* main processing */
if d_visit= '' then do ;
if prevvar='dt0' then d_visit=nextvalue ;
if prevvar ne 'dt0' then do ;
d_visit= COALESCEC (prevvalue, nextvalue);
end;
end;
run;
%mend var;
%var;
data d6_&i ;
set d5 ;
keep in date_1 d_visit inn ;
run;
%end;
%mend all ;
%all ;
data f ;
set d6: ;
run;
proc sort data=f out= want (drop= inn) ; by inn ; run;
Thanks so much for your help. But I have more than 5000 obs, and the drive keeps warning me there is no enough space to save the temporary data.
Have a look at:
data want(keep= pin date_1 d_visit);
array dates[100] _temporary_;
*call missing(of dates[*]);
i = 1;
do _n_ = 1 by 1 until(last.pin);
set work.have;
by pin;
if not missing(d_visit) then do;
dates[i] = d_visit;
i = i + 1;
end;
end;
lastDate = i - 1;
do _n_ = 1 by 1 until(last.pin);
set work.have;
by pin;
if missing(d_visit) then do;
minDiff = 100000;
minId = .;
do i = 1 to lastDate;
d = abs(date_1 - dates[i]);
if d < minDiff then do;
minDiff = d;
minId = i;
end;
end;
d_visit = dates[minId];
end;
output;
end;
run;
You will find one difference to the expected d_visit-value in the third obs for pin=6. I double checked it: the difference between 5h Oct 2015 and 1st Apr 2014 is 552 days, the difference to 31st Jan 2017 only 484 days.
Thanks so much. It's my bad and I haven't made it clear, that I have more than 5000 obs, and when I run the codes, it only generated the first 149 obs output.
ERROR: Array subscript out of range at line 549 column 20.
When I tried to change the array to more than 5000, it still gave me the same ERROR, may I ask how to solve the problem?
Thanks again
@linda0910 wrote:
Thanks so much. It's my bad and I haven't made it clear, that I have more than 5000 obs, and when I run the codes, it only generated the first 149 obs output.
ERROR: Array subscript out of range at line 549 column 20.
When I tried to change the array to more than 5000, it still gave me the same ERROR, may I ask how to solve the problem?
Thanks again
Please post the log with the error using "insert code" - the </> icon.
data have;
infile cards truncover;
input pin date_1 : mmddyy10. d_visit : mmddyy10. ;
format date_1 d_visit mmddyy10.;
datalines ;
3 1/30/2013
3 6/19/2014 6/19/2014
3 1/28/2015 11/12/2015
5 2/27/2013
5 3/14/2014 3/14/2014
5 5/29/2015
5 2/5/2016
5 2/10/2017 2/10/2017
6 4/15/2013
6 4/1/2014 4/1/2014
6 10/5/2015
6 6/20/2016
6 1/31/2017 1/31/2017
;
run;
proc sql;
create table want as
select a.pin,a.date_1,coalesce(a.d_visit,b.d_visit) as d_visit format=mmddyy10.
from have as a , have as b
where a.pin=b.pin and b.d_visit is not missing
group by a.pin,a.date_1
having abs(a.date_1-b.d_visit)=min(abs(a.date_1-b.d_visit));
quit;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.