BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
Rkvi
Fluorite | Level 6

Hi,

 

  I have the attached spreadsheet that needs to be read in and based on how many Patients we have, the output should be in the format mentioned below: 

 

Input spreadsheet format:

 

LabtestsPatient 2Patient 5Patient 9
Test1Test2Test3Test1Test2Test3Test1Test2Test3
Day 101000000.21.85
Day 2121   010
Day 3   010   
Day 4628   259
Day 25   256   

 

Desired output shown for patient_2 and Patient_5

 

Obs

subject

time

test1

test2

test3

1

Patient_2

Day 1

0

1

0

2

Patient_2

Day 2

1

2

1

3

Patient_2

Day 3

 

 

 

4

Patient_2

Day 4

6

2

8

5

Patient_2

Day 25

 

 

 

6

Patient_5

Day 1

0

0

0

7

Patient_5

Day 2

 

 

 

8

Patient_5

Day 3

0

1

0

9

Patient_5

Day 4

 

 

 

10

Patient_5

Day 25

 

 

 

 

Questions: 1)  Know how to import .xlsx data but there's extra information in the spreadsheet which is not needed, so need to filter the data to be imported to be only part of the spreadsheet which is relevant.

 

2) There can be more Patient data on the right side, so how do we find the far right column and then import the data for each patient.

 

Thanks for taking a look at this and appreciate any suggestions on how this could be done efficiently. 

 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

proc import datafile='/folders/myfolders/test.xlsx' dbms=xlsx out=have replace;
 getnames=no;
run;
proc transpose data=have(obs=2 drop=a) out=a(drop=_:);
 var _all_;
run;

data a;
 set a(where=(col2 is not missing));
 length subject $ 80;
 retain subject;
 if not missing(col1) then subject=col1;
 drop col1;
 rename col2=vnames;
run;
proc transpose data=have(obs=0) out=b;
 var _all_;
run;
data label;
 merge b(firstobs=2) a(in=ina);
 if ina;
run;
proc sql noprint;
select _name_ into : names separated by ' ' from label;
select cats(_name_,'="',vnames,'_',subject,'"') into : label separated by ' ' from label;
quit;
proc datasets library=work nodetails nolist;
modify have;
label &label ;
quit;
proc transpose data=have(where=(upcase(a) contains 'DAY')) out=temp1;
by a notsorted;
var &names;
run;
data temp1;
 set temp1;
 id=scan(_label_,1,'_');
 subject=scan(_label_,-1,'_');
run;
proc transpose data=temp1 out=want(drop=_name_);
 by a subject notsorted;
 id id;
 var col1;
run;
proc sort data=want; by subject;run;


View solution in original post

4 REPLIES 4
rogerjdeangelis
Barite | Level 11
SAS Forum: how to import part of spreadsheet i.e. excel data and obtain desired SAS dataset

You might want to check out SAS-L for other foreign file handling tips.


HAVE The following spreadsheet
/*

                   Patient 2                        Patient 5                        Patient 9
 LAB        TEST1      TEST2      TEST3      TEST1      TEST2      TEST3      TEST1      TEST2      TEST3
 DAY 1          4          9          3          9          2          4          2          6          7
 DAY 2          9          1          9          0          4          4          3          0          9
 DAY 3          9          9          8          6          8          4          5          7          2
 DAY 4          8          2          6          8          3          6          5          4          9
 DAY 5          4          7          0          3          7          1          8          3          9

sheet1
*/

WANT

/*
Up to 40 obs WORK.PATLABSRT total obs=45

Obs     DAY        PAT       TEST1    TEST2    TEST3

  1    DAY 1    Patient 2      4        9        3
  2    DAY 1    Patient 2      9        2        4
  3    DAY 1    Patient 2      2        6        7
  4    DAY 2    Patient 2      9        1        9
  5    DAY 2    Patient 2      0        4        4
  6    DAY 2    Patient 2      3        0        9
  7    DAY 3    Patient 2      9        9        8
  8    DAY 3    Patient 2      6        8        4
  9    DAY 3    Patient 2      5        7        2
 10    DAY 4    Patient 2      8        2        6
 11    DAY 4    Patient 2      8        3        6
 12    DAY 4    Patient 2      5        4        9
 13    DAY 5    Patient 2      4        7        0
....
*/

SOLUTION (Does not solve all the wrinkles but that is trivial)

* Here is the working code. It took much more code
to create the input;

libname xls "d:/xls/have.xlsx" header=no scan_text=no;
data patlab(drop=i f2-f10 rename=f1=Day);
  retain pat;
  set xls.'sheet1$A3:Z25'n;
  array pattest[3,3] F2-F10;
  do pat='Patient 2', 'Patient 5', 'Patient 9';
    do i=1 to 3;
       TEST1=pattest[i,1];
       TEST2=pattest[i,2];
       TEST3=pattest[i,3];
       output;
    end;
 end;
;run;quit;
libname xls clear;



COMPLETE SOLUTION (MOST of THE WORK IS CREATING THE EXCEL SHEET)

I have simplified the problem a little but it is easy to get
the non sequential patient numbers using set 'sheet1$A1:Z1'n;

* CREATE THE DATA;
data have(keep=day nam val);
     do day='DAY 1', 'DAY 2', 'DAY 3', 'DAY 4', 'DAY 5';
        do test='Test1', 'Test2', 'Test3';
  do tre='1','2','3';
            val=int(10*uniform(57371));
            nam=cats(tre,test);
            output;
        end;
     end;
  end;
run;quit;


proc transpose data=have out=havxpo;
by day;
id nam;
var val;
run;quit;

* CREATE THE SPREADSHEET;
ods excel file="d:/xls/have.xlsx";
ods excel options(sheet_name="sheet1" sheet_interval="none");
ods
proc report data=havxpo;
cols DAY
 ("Patient 2" _1TEST1    _2TEST1    _3TEST1)
 ("Patient 5" _1TEST2    _2TEST2    _3TEST2)
 ("Patient 9" _1TEST3    _2TEST3    _3TEST3);
define DAY    / "LAB";
define _1TEST1  / "TEST1" ;
define _2TEST1  / "TEST2" ;
define _3TEST1  / "TEST3" ;
define _1TEST2  / "TEST1" ;
define _2TEST2  / "TEST2" ;
define _3TEST2  / "TEST3" ;
define _1TEST3  / "TEST1" ;
define _2TEST3  / "TEST2"  ;
define _3TEST3  / "TEST3";
run;quit;
ods excel close;

/*
Here is the spreadsheet

                   Patient 2                        Patient 5                        Patient 9
 LAB        TEST1      TEST2      TEST3      TEST1      TEST2      TEST3      TEST1      TEST2      TEST3
 DAY 1          4          9          3          9          2          4          2          6          7
 DAY 2          9          1          9          0          4          4          3          0          9
 DAY 3          9          9          8          6          8          4          5          7          2
 DAY 4          8          2          6          8          3          6          5          4          9
 DAY 5          4          7          0          3          7          1          8          3          9

sheet1
*/

* access the sheet and transpose;

libname xls "d:/xls/have.xlsx" header=no scan_text=no;
* this will only get the names that exist;

data patlab(drop=i f2-f10 rename=f1=Day);
  retain pat;
  set xls.'sheet1$A3:Z25'n;
  array pattest[3,3] F2-F10;
  do pat='Patient 2', 'Patient 5', 'Patient 9';
    do i=1 to 3;
       TEST1=pattest[i,1];
       TEST2=pattest[i,2];
       TEST3=pattest[i,3];
       output;
    end;
 end;
;run;quit;
libname xls clear;

* MAKE IT LOOK LIKE THE OPS OUTPUT;

proc sort data=patlab out=patlabsrt;
by pat day;
run;quit;

/*
Up to 40 obs WORK.PATLABSRT total obs=45

Obs     DAY        PAT       TEST1    TEST2    TEST3

  1    DAY 1    Patient 2      4        9        3
  2    DAY 1    Patient 2      9        2        4
  3    DAY 1    Patient 2      2        6        7
  4    DAY 2    Patient 2      9        1        9
  5    DAY 2    Patient 2      0        4        4
  6    DAY 2    Patient 2      3        0        9
  7    DAY 3    Patient 2      9        9        8
  8    DAY 3    Patient 2      6        8        4
  9    DAY 3    Patient 2      5        7        2
 10    DAY 4    Patient 2      8        2        6
 11    DAY 4    Patient 2      8        3        6
 12    DAY 4    Patient 2      5        4        9
 13    DAY 5    Patient 2      4        7        0
....
*/


Ksharp
Super User

proc import datafile='/folders/myfolders/test.xlsx' dbms=xlsx out=have replace;
 getnames=no;
run;
proc transpose data=have(obs=2 drop=a) out=a(drop=_:);
 var _all_;
run;

data a;
 set a(where=(col2 is not missing));
 length subject $ 80;
 retain subject;
 if not missing(col1) then subject=col1;
 drop col1;
 rename col2=vnames;
run;
proc transpose data=have(obs=0) out=b;
 var _all_;
run;
data label;
 merge b(firstobs=2) a(in=ina);
 if ina;
run;
proc sql noprint;
select _name_ into : names separated by ' ' from label;
select cats(_name_,'="',vnames,'_',subject,'"') into : label separated by ' ' from label;
quit;
proc datasets library=work nodetails nolist;
modify have;
label &label ;
quit;
proc transpose data=have(where=(upcase(a) contains 'DAY')) out=temp1;
by a notsorted;
var &names;
run;
data temp1;
 set temp1;
 id=scan(_label_,1,'_');
 subject=scan(_label_,-1,'_');
run;
proc transpose data=temp1 out=want(drop=_name_);
 by a subject notsorted;
 id id;
 var col1;
run;
proc sort data=want; by subject;run;


rogerjdeangelis
Barite | Level 11

Correction to my code

The output should be after the inner loop

 

Nice solution,  I like the capture of Patient number. The only small improvement I would make tto your solution is to convert the

lab values to numeric. The lab values are are character.

 

I have posted code on SAS-L  that will find data ranges, by doing inplace searches of excel. Also first cell last cell..

 

I think using the previous posters technique to get the header and my technique to get the body is a good solution 

 

If you don't like the A3$Z25, you can seach the sheet inplace to find row and column for 'DAY 1'

 

I also posted a 'more flexible' SAS/R techniques on SAS-L.

 

 

* Getting header info;

data sample_code; do until (dne); set xls.'sheet1$A1:Z2'n(rename=(F1-F11=P1-P11)) end=dne; /* SAS code */ output; /* Up to 40 obs WORK.SAMPLE_CODE total obs=7 Obs P1 P2 P3 P4 P5 P6 P7 P8 P9 P10 1 Patient 2 Patient 5 Patient 9 2 LAB TEST1 TEST2 TEST3 TEST1 TEST2 TEST3 TEST1 TEST2 TEST3 */ end; dne=0; do until (dne); set xls.'sheet1$A3:Z25'n; /* SAS code */ output; end; run;quit;

 

 

 

Corrected code

libname xls "d:/xls/have.xlsx" header=no scan_text=no; * this will only get the names that exist; data patlab(drop=i f2-f10 rename=f1=Day); retain pat; set xls.'sheet1$A3:Z25'n; array pattest[3,3] F2-F10; do pat='Patient 2', 'Patient 5', 'Patient 9'; do i=1 to 3; TEST1=pattest[i,1]; TEST2=pattest[i,2]; TEST3=pattest[i,3]; end; output; end; ;run;quit; libname xls clear; Alphabetic List of Variables and Attributes # Variable Type Len Format Informat Label 1 A Char 5 $5. $5. A 2 SUBJECT Char 40 3 TEST1 Char 9 4 TEST2 Char 9 5 TEST3 Char 9

 

Rkvi
Fluorite | Level 6

Thank you both for such a quick response. Its very helpful.

 

Hi rogerjdangelis,  First thank you for giving me the initial tip of multi dimensional array, I tried to loop through after every 3 columns for 3 tests but just went off in a total wrong track of using mod operator and setting array counter. Since I started off by importing data and had some vague idea, after looking your solution I was able to put that missing link and will post my code too.

 

Hi Xia keshan (Ksharp),

 

when I ran into some of your solutions, I save them for later reference and spend couple of days sometimes to dig through the ideas, its just amazing how you write at a much deeper level. Thanks for your solution. I do need to go through it again to get the complete picture 🙂

 

Here's my solution:  

 

* only place where userinput is needed if there is more data is locating 
  the last cell to read the spredsheet data in the RANGE
  here it is J7 for 3 patients data;

PROC IMPORT OUT=WORK.in1
        DATAFILE="U:\sas code\test.xlsx"
          REPLACE;
    GETNAMES = YES; 
    RANGE="sheet1$a1:J7";
    USEDATE=NO;
    SCANTIME=YES;  
RUN;


* obtain all the variable names ie columns in spreadsheet into a dataset cols;
 proc sql noprint;
 create table cols as
   select name 
       from dictionary.columns
    where libname='WORK' 
	      and memname = 'IN1'
	  ;
 quit;

* here we declare 2 macros 
  patnum  = number of patients i.e. column name has patient in it
  lastcol = last associated column for that patient
 ;

 data _null_ ;
   set cols end=last;

   if  lowcase(name) =: 'patient'
    then totpat + 1;

	if last then do;
    call symput('patnum', totpat);
	call symput('lastcol', name) ;
    end;
run;

%put &patnum.;
%put &lastcol.;


* declare a 2 dimensional array 
  first dimension is based on number of patients - macro var patnum
  second dimension is constant 3 since we will always have 3 tests;

data in2;
 set in1 (firstobs=2);
  
   *based on number of patients 2D array ;
   array avars[&patnum., 3] Patient_2 -- &lastcol.;

   * declare test names here ;
   array atests[3] test1 test2 test3;

   * initialize the tests;
   do i = 1 to dim(atests);
     atests[i] = .;
   end;

  *for each patient assign the 3 tests ;
  do i = 1 to &patnum.;
    subject = vname(avars[i,1]);
    atests [1] = avars[i,1];
	atests [2] = avars[i,2];
	atests [3] = avars[i,3];
    day = labtests;
   * for sorting daynum;
    daynum = input(scan(labtests, -1), 2.);
	if not missing(day) then
	output;
  end;

  keep subject day test1 test2 test3 daynum;
run;

* Final dataset sorted by daynum;
 proc sql;
   create table want as
     select subject, day, 
	        test1, test2, test3,
			daynum
	 from in2
	order by subject, daynum;
 quit;

 

    

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 4 replies
  • 2840 views
  • 1 like
  • 3 in conversation