Solved: Re: getting the first non-missing value in a column

daufoi · Posted 12-06-2022 12:06 AM

I have the following data. I sorted it by ID and date. How can I get the first date for each patient but if there is a missing value in the location column, I want the next non-missing value?

data fake_data;
input patID $ date monyy6. location $ outcome ;
format date monyy.;
datalines;
1693 Dec-14 . 1
1693 Nov-14 . 1
1693 Oct-14 CA 1
1693 Sep-14 CA 0
1693 Aug-14 CO 0
1693 Jul-14 CO 0
1129 Feb-18 . 1 
1129 Jan-18 . 0
1129 Dec-17 NY 0
1129 Nov-17 PA 0
1345 Jun-19 . 0 
1345 May-19 . 0 
1345 Apr-19 NY 0 
1345 Mar-19 NY 0
1345 Feb-19 NY 0
1345 Jan-19 CA 0 
1345 Dec-18 CA 0
1345 Nov-18 CA 0
;

I have this so far...

proc sql;
create table test as 
select A.*, 
coalesce(A.location, B.location) AS location_2
from fake_data A, fake_data B
where A.patID = B.patID
group by A.patID
order by A.patID, A.date desc;
quit;

I would like to be able to sort it so that I can choose the first entry which will have all columns with data.

daufoi · Posted 12-06-2022 01:15 AM

I wanted not missing location but all the date and outcome data have to come from the most recent date. I am interested in the data with most recent date and backfilling location. But I found the solution if anyone comes across this...

data fake_data;
input patID $ date monyy6. location $ outcome ;
format date monyy.;
datalines;
1693 Dec-14 . 1
1693 Nov-14 . 1
1693 Oct-14 CA 0
1693 Sep-14 CA 0
1693 Aug-14 CO 0
1693 Jul-14 CO 0
1129 Feb-18 . 1 
1129 Jan-18 . 0
1129 Dec-17 NY 0
1129 Nov-17 PA 0
1345 Jun-19 . 0 
1345 May-19 . 0 
1345 Apr-19 NY 0 
1345 Mar-19 NY 0
1345 Feb-19 NY 0
1345 Jan-19 CA 0 
1345 Dec-18 CA 0
1345 Nov-18 CA 0
;

proc sort data=fake_data; by patID descending date; run;
data fake_row;
  set fake_data;
  rownum=_n_;
run;

proc sql;
create table test as 
select A.*, B.rownum as rownum2,
coalesce(A.location, B.location) AS location_2
from fake_row A, fake_row B
where A.patID = B.patID
group by A.patID
order by A.patID, A.date desc;
quit;

data test2;
	set test;
	where not missing(location_2);
	row = rownum + rownum2;
run;
proc sort data=test2; by patID row; run;

data final;
	set test2;
	by patID row;
	if first.patID;
run;

View solution in original post

andreas_lds · Posted 12-06-2022 01:02 AM

It is not clear what you expect as result.

If you just want the first obs with non-missing location for each id, then try:

data want;
  set fake_data;
  by patID notsorted; /* EDIT: Your data is grouped  by id, but not sorted */
  where not missing(location);
  if first.patID;
run;

daufoi · Posted 12-06-2022 01:15 AM

I wanted not missing location but all the date and outcome data have to come from the most recent date. I am interested in the data with most recent date and backfilling location. But I found the solution if anyone comes across this...

data fake_data;
input patID $ date monyy6. location $ outcome ;
format date monyy.;
datalines;
1693 Dec-14 . 1
1693 Nov-14 . 1
1693 Oct-14 CA 0
1693 Sep-14 CA 0
1693 Aug-14 CO 0
1693 Jul-14 CO 0
1129 Feb-18 . 1 
1129 Jan-18 . 0
1129 Dec-17 NY 0
1129 Nov-17 PA 0
1345 Jun-19 . 0 
1345 May-19 . 0 
1345 Apr-19 NY 0 
1345 Mar-19 NY 0
1345 Feb-19 NY 0
1345 Jan-19 CA 0 
1345 Dec-18 CA 0
1345 Nov-18 CA 0
;

proc sort data=fake_data; by patID descending date; run;
data fake_row;
  set fake_data;
  rownum=_n_;
run;

proc sql;
create table test as 
select A.*, B.rownum as rownum2,
coalesce(A.location, B.location) AS location_2
from fake_row A, fake_row B
where A.patID = B.patID
group by A.patID
order by A.patID, A.date desc;
quit;

data test2;
	set test;
	where not missing(location_2);
	row = rownum + rownum2;
run;
proc sort data=test2; by patID row; run;

data final;
	set test2;
	by patID row;
	if first.patID;
run;

getting the first non-missing value in a column

Re: getting the first non-missing value in a column

Re: getting the first non-missing value in a column

Re: getting the first non-missing value in a column

Catch up on SAS Innovate 2026

SAS Training: Just a Click Away