BookmarkSubscribeRSS Feed
data_null__
Jade | Level 19
> Yeah, your right. It has been running over the
> weekend and it is only the the H's
>
> what can I do differently? maybe I could import and
> combine all the same files then change the variable
> names instead of renaming them in each iteration? Or
> should I stay away from proc import all together?
Yes, forget PROC IMPORT, you mentioned the data had no field names row so you are better off writing your own data step to read the data. Plus you have many file that are similarly structured.

I would just write a data step to read all the files of a particular type. I gave you a example but it may not have been adequate. You did not provide enough details about the fields to do any more than demonstrate one method of read multiple files.

>
> Originally it was running faster about 8 seconds per
> loop so it should have taken 8 hours. It is running
> much slower now 15 seconds per loop. why is this?

It is hard to guess as but leaky memory seems reasonable. Or maybe your PC just got tired. 🙂

>
> Let me know if there is a more efficient way of doing
> this. Thank you!
data step.... details to be determined.
deleted_user
Not applicable
I looked over your example but I did not follow it as well so I tried to stick with the import code. I will work on that version now. I do not know how to make the 'cards' option work for the number of files I have. maybe if i run it through a macro and put the &&filename&i at the end of the path.

If you want an idea of exactly what I am trying to do my code is posted below. It preforms the task but not quickly enough.

You can see that I am making one file per day per location. Would it be more efficient if I began making a yearly data set and delete each daily file after I append it to the end of the yearly data set? That way sas would not have as many data files in the _2005_ library.

I will try to work my way through your code now. Thanks!









~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

libname _2005_ "C:\Users\mherrin\Documents\Horse_Racing\daily2005";



filename indata pipe 'dir C:\Users\mherrin\Documents\Horse_Racing\2005 /b';
data file_list;
length fname $20;
infile indata truncover; /* infile statement for file names */
input fname $20.; /* read the file names from the directory */
call symput (compress('fname'!!_n_),fname);
call symput ('num_files',_n_); /* store the record number in a macro variable */
call symput (compress('name'!!_n_),substr(fname,1,11));
run;

%put _global_;

%macro import;
%do i=1 %to &num_files;
dm 'clear log; clear output;';
%if %scan(&&fname&i,-1,'.')=1 %then %do;
PROC IMPORT OUT= %sysfunc(translate(&&fname&i,_,.))
DATAFILE= "C:\Users\mherrin\Documents\Horse_Racing\2005\&&fname&i"
DBMS=CSV REPLACE;
getnames=no;
RUN;
data %sysfunc(translate(&&fname&i,_,.));
set %sysfunc(translate(&&fname&i,_,.))(rename=(var1=Track var2=Date var3=RaceNum var4=DayFlag var5=Dist
var6=DistUnit var7=DistFlag var8=Surf1 var9=Surf2
var11=AllWeather var12=Chute var13=BrisType var14=EQBType var15=Grade
var16=ASRest var17=RRest var18=StateFlag var19=Class var20=Breed
var21=Country var22=Purse var23=TotValue
var28=MaxClmgPrice var30-var34=RC1-RC5
var37=FieldSize var38=TrackCond var39-var43=Frac1-Frac5 var44=FinalTime
var45-var49=FracDist1-FracDist5
var50=OffTime var51=StartCallDist var52-var54=CallDist1-CallDist3 var55=RaceName
var56=StartDesc var57=RailDist var58= OffTurf var59=OTDistChange
var63=Weather var64=RaceTemp var65=WPSPool)
drop=var10 var24-var27 var29 var35 var36 var60-var62 var66-var99);
run;
data %substr(%sysfunc(translate(&&fname&i,_,.)),1,11);
merge %sysfunc(translate(&&fname&i,_,.)) %substr(%sysfunc(translate(&&fname&i,_,.)),1,7);
by RaceNum;
run;
%end;
%if %scan(&&fname&i,-1,'.')=2 %then %do;
PROC IMPORT OUT= %sysfunc(translate(&&fname&i,_,.))
DATAFILE= "C:\Users\mherrin\Documents\Horse_Racing\2005\&&fname&i"
DBMS=CSV REPLACE;
getnames=no;
RUN;
data %sysfunc(translate(&&fname&i,_,.));
set %sysfunc(translate(&&fname&i,_,.))(rename=(var1=Track var2=Date var3=RaceNum var4=DayFlag var5=HorseName
var6=ForeignBred var7=StateBred var8=PostPosition var9=ProgNum
var10=YoB var11=Breed var12=CoupledFlag var13=AbbJName
var14=JLast var15=JFirst var16=JMiddle
Var18=AbbTName var19=TLast var20=TFirst var21=TMiddle
var22=TripComment var24=OName var25=OFirst var26=OMiddle
var27=ClmgPrice var28=MedCode var29=EquipCode Var30=Earnings
var31=Odds var32=NonBetFlag var33=FavFlag var36=DQFlag
var37=DQPlacing var38=Weight var39=CorWeight var40=OWAmount
var41=Claimed var42=ClmAbbTrn var43=ClmTrnLast var44=ClmTrnFirst
var45=ClmTrnMiddle var47=ClmAbbOwn var48=ClmOwnLast
var49=ClmOwnFirst var50=ClmOwnMiddle var51=WinPO var52=PlacePO
var53=ShowPO var55=StPos var56-var58=CallPos1-CallPos3
var59=StretchPos var60=FinishPos Var61=OfficialPos
var62=StLengthsAhd var63-var65=CallLengthsAhd1-CallLengthsAhd3
var66=StretchLengthsAhd var67=FinishLengthsAhd var68=StLengthsBhd
var69-var71=CallLengthsBhd1-CallLengthsbhd3 var72=StrechLengthsBhd
var73=FinishLengthsBhd var74=StCallMgn var75-var77=CallMgn1-CallMgn3
var78=StretchMgn var79=FinishMgn var80=DeadHeatFlag)
drop=var17 var23 var34 var35 var46 var54 var81-var99);
run;
data %sysfunc(translate(&&fname&i,_,.));
set %sysfunc(translate(&&fname&i,_,.));
if PostPosition=99 then delete;
run;
data %substr(%sysfunc(translate(&&fname&i,_,.)),1,11);
set %substr(%sysfunc(translate(&&fname&i,_,.)),1,11)(drop=PostPosition PostPos);
run;
proc sort data=%substr(%sysfunc(translate(&&fname&i,_,.)),1,11);
by racenum prognum;
run;
proc sort data=%sysfunc(translate(&&fname&i,_,.));
by racenum prognum;
run;
data %substr(%sysfunc(translate(&&fname&i,_,.)),1,11);
merge %substr(%sysfunc(translate(&&fname&i,_,.)),1,11) %sysfunc(translate(&&fname&i,_,.));
by racenum prognum;
run;
data _2005_.%substr(%sysfunc(translate(&&fname&i,_,.)),1,11);
set %substr(%sysfunc(translate(&&fname&i,_,.)),1,11);
if postposition=. then delete;
run;
PROC DATASETS LIB=WORK MEMTYPE=DATA
kill;
RUN;


%end;
%if %scan(&&fname&i,-1,'.')=DRF %then %do;
PROC IMPORT OUT= %sysfunc(translate(&&fname&i,_,.))
DATAFILE= "C:\Users\mherrin\Documents\Horse_Racing\2005\&&fname&i"
DBMS=CSV REPLACE;
getnames=no;
RUN;
data %sysfunc(translate(&&fname&i,_,.));
set %sysfunc(translate(&&fname&i,_,.))(rename=(var1=Track var2=Date var3=RaceNum var4=PostPos var5=Entry
var6=Dist var7=Surface var9=RaceType var10=ASRest
var11=RaceClass var12=Purse var13=ClmgPrice var14=ClmgPriceHorse var15=TrackRecord
var16=RaceCond var17=LasixList var18=ButeList var19=CoupledList var20=MutuelList
var21=SimHT var22=SimHTRaceNum var23=BreedType var24=NasalStpChng var25=AllWeather
var28=Trn var29=TrnStarts var30=TrnWins var31=TrnPlaces var32=TrnShows
var33=Jockey var34=AppWgtAllow var35=JStarts var36=JWins var37=Jplaces var38=JShows
var39=Owner var40=OSilks var41=MainTrackOnly var43=ProgNum var44=MornLine
var45=HorseName var46=Yob var47=FoalMonth var49=Sex Var50=HColor
var51=Weight var52=Sire var53=SSire var54=Dam var55=DSire
var56=Breeder var57=StCountryBred var58=PostPosition
var62=NewMeds var63=OldMeds var64=EquipChng
var65=TdyDistStarts var66=TdyDistWins var67=TdyDistPlaces var68=TdyDistShows var69=TdyDistEarnings
var70=TdyTrackStarts var71=TdyTrackWins var72=TdyTrackPlaces var73=TdyTrackShows var74=TdyTrackEarnings
var75=TurfStarts var76=TurfWins var77=TurfPlaces var78=TurfShows var79=TurfEarnings
var80=WetStarts var81=WetWins var82=WetPlaces var83=WetShows var84=WetEarnings
var85=CurYear var86=CurYearStarts var87=CurYearWins var88=CurYearPlaces var89=CurYearShows var90=CurYearEarnings
var91=PrevYear var92=PrevYearStarts var93=PrevYearWins var94=PrevYearPlaces var95=PrevYearShows var96=PrevYearEarnings
var97=LifeStarts var98=LifeWins var99=LifePlaces var100=LifeShows var101=LifeEarnings

var102-var113=WODate1-WODate12
var114-var125=WOTime1-WOTime12
var126-var137=WOTrack1-WOTrack12
var138-var149=WODist1-WODist12
var150-var161=WOTrackCond1-WOTrackCond12
var162-var173=WODescript1-WODescript12
var174-var185=MITrackIndicator1-MITrackIndicator12
var186-var197=NumWorksDD1-NumWorksDD12
var198-var209=RankDD1-RankDD12

var210=RunStyle var211=QuirinSpdPts var214=f2BrisPacePar var215=f4BrisPacePar var216=f6BrisPacePar
var217=BrisSpeedParClass var218=BrisLatePacePar var224=DaysSinceLR var225-var230=DRFRaceCond1-DRFRaceCond6
var231=AWSLifeStarts var232=AWSLifeWins var233=AWSLifePlaces var234=AWSLifeShows var235=AWSLifeEarnings
var236=AWSBestBrisSpeed var238=LowClmgPrice var239=StateBredFlag
var240-var248=WagerType1-WagerType9
var251=BrisPowerRtng

var256-var265=RaceDate1-RaceDate10
var266-var274=DaysSinceLR1-DaysSinceLR9
var276-var285=Track1-Track10
Var286-var295=BrisTrack1-BrisTrack10
Var296-var305=RaceNum1-RaceNum10
var306-var315=TrackCond1-TrackCond10
var316-var325=Dist1-Dist10
var326-var335=Surf1-Surf10
var336-var345=SpecChute1-SpecChute10
Var346-var355=NumEntrants1-NumEntrants10
var356-var365=PostPosition1-PostPosition10
var366-var375=Equip1-Equip10
Var386-var395=Med1-Med10
Var396-var405=TripComment1-TripComment10
var406-var415=WinName1-WinName10
var416-var425=SecndName1-SecndName10
var426-var435=ThrdName1-ThrdName10
var436-var445=WinWgt1-WinWgt10
var446-var455=SecndWgt1-SecndWgt10
var456-var465=ThrdWgt1-ThrdWgt10
var466-var475=WinMgn1-WinMgn10
var476-var485=SecndMgn1-SecndMgn10
var486-var495=ThrdMgn1-ThrdMgn10
var496-var505=Comment1-Comment10
var506-var515=Wgt1-Wgt10
var516-var525=Odds1-Odds10
var526-var535=Entry1-Entry10
var536-var545=RaceClass1-RaceClass10
var546-var555=ClmgPrice1-ClmgPrice10
var556-var565=Purse1-Purse10
var566-var575=StartCallPos1-StartCallPos10
var576-var585=FrstCallPos1-FrstCallPos10
var586-var595=SecndCallPos1-SecndCallPos10
var596-var605=GateCallPos1-GateCallPos10
var606-var615=StretchPos1-StretchPos10
var616-var625=FinishPos1-FinishPos10
var626-var635=MoneyPos1-MoneyPos10
var636-var645=StartCallLgnbtnLead1-StartCallLgnbtnLead10
var646-var655=StartCallLgnBtn1-StartCallLgnBtn10
var656-var665=FrstCallLgnbtnLead1-FrstCallLgnbtnLead10
var666-var675=FrstCallLgnbtn1-FrstCallLgnbtn10
var676-var685=SecndCallLgnbtnLead1-SecndCallLgnbtnLead10
var686-var695=SecndCallLgnbtn1-SecndCallLgnbtn10
var696-var705=Shape1stCall1-Shape1stCall10
var716-var725=StretchLgnBtnLead1-StretchLgnBtnLead10
var726-var735=StretchLgnBtn1-StretchLgnBtn10
var736-var745=FinishLgnBtnLead1-FinishLgnBtnLead10
var746-var755=FinishLgnBtn1-FinishLgnBtn10
var756-var765=RaceShape2ndCall1-RaceShape2ndCall10
var766-var775=f2Pace1-f2Pace10
var776-var785=f4Pace1-f4Pace10
var786-var795=f6Pace1-f6Pace10
var796-var805=f8Pace1-f8Pace10
var806-var815=f10Pace1-f10Pace10
var816-var825=LatePace1-LatePace10

var846-var855=BrisSpeedRating1-BrisSpeedRating10
var856-var865=DRFSpeedRating1-DRFSpeedRating10
var866-var875=DRFTrackWgt1-DRFTrackWgt10
var876-var885=f2Frac1-f2Frac10
var886-var895=f3Frac1-f3Frac10
var896-var905=f4Frac1-f4Frac10
var906-var915=f5Frac1-f5Frac10
var916-var925=f6Frac1-f6Frac10
var926-var935=f7Frac1-f7Frac10
var936-var945=f8Frac1-f8Frac10
var946-var955=f10Frac1-f10Frac10
var956-var965=f12Frac1-f12Frac10
var966-var975=f14Frac1-f14Frac10
var976-var985=f16Frac1-f16Frac10
var986-var995=Num1Frac1-Num1Frac10
var996-var1005=Num2Frac1-Num2Frac10
var1006-var1015=Num3Frac1-Num3Frac10
var1036-var1045=FinalTime1-FinalTime10
var1046-var1055=Claimed1-Claimed10
var1056-var1065=Trn1-Trn10
var1066-var1075=Jock1-Jock10
var1076-var1085=AppWgtAllow1-AppwgtAllow10
var1086-var1095=RaceType1-RaceType10
var1096-var1105=ASRest1-ASRest10
var1106-var1115=StateBred1-StateBred10
var1116-var1125=QualFlag1-QualFlag10
var1126-var1135=Favorite1-Favorite10
var1136-var1145=FBandage1-FBandage10

var1147=CurYearTrnStarts var1148=CurYearTrnWins var1149=CurYearTrnPlaces var1150=CurYearTrnShows
var1151=CurYearTrnROI var1152=PrevYearTrnStarts var1153=PrevYearTrnWins var1154=PrevYearTrnPlaces
var1155=PrevYearTrnShows var1156=PrevYearTrnROI var1157=CurYearJStarts var1158=CurYearJWins
var1159=CurYearJPlaces var1160=CurYearJShows var1161=CurYearJROI var1162=PrvYearJStarts
var1163=PrevYearJWins var1164=PrevYearJPlaces var1165=PrevYearJShows var1166=PrevYearJROI

var1167-var1176=BrisSpeedParClass1-BrisSpeedParClass10

var1177=CurrSireStudFee
var1178=BestSpFast
var1179=BestSPTurf
var1180=BestSpOffTrack
var1181=BestSpDist
var1182-var1191=BarShoe1-BarShoe10
var1192-var1201=CompLine1-CompLine10
var1202-var1211=LowClmgPrice1-LowClmgPrice10
var1212-var1221=HighClmgPrice1-HighClmgPrice10
var1222=AuctionPrice
var1223=WhereWhenAuction
var1254-var1263=Nasaloff1-Nasaloff10
var1264=DirtPedRtng var1265=MudPedRtng var1266=TurfPedRtng var1267=DistPedRtng var1328=LifeBestSpeed
var1329=FrstRecYrBestSpeed var1330=SecndRecYrBestSpeed var1331=TodayTrackBestSpeed var1332=FDNumStarts var1333=FDNumWins
var1334=FDNumPlaces var1335=FDNumShows var1336=FDNumEarnings var1337=KeyStatNum1 var1338=StartsNum1
var1339=WinPercNum1 var1340=ITMPercNum1 var1341=twodolROINum1 var1342=KeyStatNum2 var1343=StartsNum2 var1344=WinPercNum2
var1345=ITMPercNum2 var1346=twodolROINum2 var1347=KeyStatNum3 var1348=StartsNum3 var1349=WinPercNum3
var1350=ITMPercNum3 var1351=twodolROINum3 var1352=KeyStatNum4 var1353=StartsNum4 var1354=WinPercNum4
var1355=ITMPercNum4 var1356=twodolROINum4 var1357=KeyStatNum5 var1358=StartsNum5 var1359=WinPercNum5
var1360=ITMPercNum5 var1361=twodolROINum5 var1362=KeyStatNum6 var1363=StartsNum6 var1364=WinPercNum6
var1365=ITMPercNum6 var1366=twodolROINum6 var1367=JDistJonTurfLabel var1368=JDistJonTurfstarts
var1369=JDistJonTurfWins var1370=JDistJonTurfPlaces var1371=JDistJonTurfShows var1372=JDistJonTurfROI
var1373=JDistJonTurfEarnings var1374=PostTimesRegion var1383-var1392=ExtStartComment1-ExtStartComment10
var1393-var1402=SealedTrack1-SealedTrack10 var1403-var1412=PrevAWSurfFlag1-PrevAWSurfFlag10)

drop=var8 var26 var27 var42 var48 var59-var61 var212 var213 var219-var223 var237 var249 var250 var252-var255 var275 var376-var385
var706-var715 var826-var845 var1016-var1035 var1146 var1224-var1253 var1268-var1327 var1375-var1382 var1413-var1435);
run;
data %substr(%sysfunc(translate(&&fname&i,_,.)),1,7);
set %sysfunc(translate(&&fname&i,_,.));
run;
%end;
%end;
%mend import;
%import; Message was edited by: Bryce
data_null__
Jade | Level 19
One thing you want to change is the looping. You want to read all the .1 files in a single data step that is what my example attemtents to demonstrate. Then you can do similar for .2 and .DRF. When these three steps are one you will have 3 SAS data sets that you can summarize as you see fit.

I will work to make a more specifiy example. Can you make some of the files available online somehow?

Also if you can copy the PROC IMPORT generated data step code from a SAS log. This will be the same code we need to put in the data step we write that reads all the file of type .1
deleted_user
Not applicable
Wow!!!

Where do I start with that?

Firstly you will need to read the files using the infile and input statements like this:

data yourdata;
infile xxxxxx dsd;
input variableA variableB etc etc

I cant explain how all that works here I would point you in the direction of the SAS documentation.

Secondly, use as few data steps as possible. Every time you put a set and data statment SAS has to read though every record in the data set which uses up resources. Do as much manipulation as you can in the one data step, ie the one you read the files in. SO do your deletes and renames and everything in there. DOing a data step simply to rename variables is a complete waste of resources and will slow you down.

Only keep the variables you are interested in. If, as I can see in one of them, you are not interested in the last 10 variables, just don't bother reading them in from the file (I appreciate that import will do this anyway, but when you use the infile and input statement, just read in what you need).

Also the amount of data in your _2005_ library will have no effect on how quickly it runs, you only need to worry about clearing that out if you are going to run out of space.

As I say, the imperative thing is to keep your datasteps to a minimum. If you think of datasets as books. Each datastep is like going through every page. If you were going to sign your name of every page in a book and put a date at the top you wouldn't run through every page signing your name, then go back to the start and put the date of every page, you'd be much faster putting both the date and signature on each page at the same time. Hope that analogy isn't too absract.
deleted_user
Not applicable
I wanted to walk through this, I am commenting it so maybe you can direct me in the things I do not understand.


dm 'clear log; clear output;';
* create test data(files to be read in next step);
data _null_;
length command $256; *command will be the location of the files;
infile cards truncover; *tells SAS to read through the cards later in the program;
input name :$128.; *what it reads in will be called 'name' What does the ':' do?
if _n_ eq 1 then do;
path = quote(substr(name,1,find(name,'\',-length(name)))); *define the path name;
command = 'rmdir /S /Q ".\xUsers\"'; *don't understand this step;
infile dummy1 pipe filevar=command; *read all of the files located in the command location (or open a pipe to that location);
command=catx(' ','mkdir',path); *creates this directory with the location designated in path;
put command=;
infile dummy1 pipe filevar=command; *pipe again to command location. What exactly does filevar= do?;
rc = sleep(1);
end;
file dummy filevar=name dsd dlm=',';
do _n_ = 1 to 6;
put _n_:z5. name;
end;
cards; *read the cards. How do I get ALL the files I need here?;
.\xUsers\mherrin\Documents\trial\APX05132005.1
.\xUsers\mherrin\Documents\trial\APX05132005.2
.\xUsers\mherrin\Documents\trial\APX0513.DRF
.\xUsers\mherrin\Documents\trial\APX05142005.1
.\xUsers\mherrin\Documents\trial\APX05142005.2
.\xUsers\mherrin\Documents\trial\APX0514.DRF
;;;;
run;

*Read Type .1 (RACE) files;

data type1;
length command filename fname path $256.;
command = 'dir /s /b ".\xUsers"'; *dir has not been defined how does this work?;
infile dummy1 pipe filevar=command truncover;
input path $256.;
if scan(path,-1,'.') eq '1'; *selects only the .1 files;
put _infile_; *???;
infile dummy2 filevar=path filename=fname end=eof dsd;
filename = scan(fname,-2,'.\'); *fname does not exist in this data set at the end?;
length prefix $3.;
prefix = filename; *assumes prefix is always first 3 characters;
date = input (substr(filename,anydigit(filename)),mmddyy10.);
do while(not eof);
input obs name:$64.; ** Fields for type 1 files?;
output;
end;
format date date9.;
run;



In the end I get a data file with the variables you defined; filename, prefix, date, ... I know you wrote above to put in the field types for the data do I need to first declare them in the lenght statement above? It does not seem to be reading any information from the file itself only the file name so far.

Thanks.
deleted_user
Not applicable
Thanks for the quick replies!!! I will see what I can make happen as you can see this is over my knowledge. But you have to learn somehow.

If you would like to email me at mherrin@g.clemson.edu I can send you a few copies of the files to test your code on. Thank you both for your help.
deleted_user
Not applicable
The code produced for the proc import is too many lines to put here. I can email this as well. Message was edited by: Bryce
deleted_user
Not applicable
Hi Bryce, firstly the cards statement simple allows SAS to read in some test data from within the program. As data _null_ does not have your files he created some test data using the cards statment. In your program the word 'cards' would be replaced by the name of your pipe fileref so your values would not be from cards but what is actually returned from the command. I have stuck some comments in in CAPS.

It's been a while since I used proc import (as the site I am on does not have access to csvs) but if I recall, it will create SAS code and put that in the log. The SAS code will use the infile/input way of reading in the data. Copy and paste that code into your program to get a good starting point. I may be wrong and maybe it doesnt put the cod ein the log, you'll have to correct me on that one.

I wanted to walk through this, I am commenting it so maybe you can direct me in the things I do not understand.


dm 'clear log; clear output;';
* create test data(files to be read in next step);
data _null_;
length command $256; *command will be the location of the files;
infile cards truncover; *tells SAS to read through the cards later in the program;
input name :$128.; *what it reads in will be called 'name' What does the ':' do? THE DOT IS JUST PART OF THE FORMAT NAME, TELLING SAS THAT IT SHOULD EXPECT THE VALUES TO BE 128 BYTES OF CHARACTER (OR LESS, THATS WHAT THE : TELLS IT)
if _n_ eq 1 then do;
path = quote(substr(name,1,find(name,'\',-length(name)))); *define the path name;
command = 'rmdir /S /Q ".\xUsers\"'; *don't understand this step; THIS IS THE COMMAND TO THE DOS PROMPT/PIPE DEVICE;
infile dummy1 pipe filevar=command; *read all of the files located in the command location (or open a pipe to that location);
command=catx(' ','mkdir',path); *creates this directory with the location designated in path;
put command=;
infile dummy1 pipe filevar=command; *pipe again to command location. What exactly does filevar= do?; FILEVAR IS A VARIABLE WHICH HOLD THE NAME OF THE FILE SO YOU CAN TELL IT TO READ DIFFERENT FILES, IT DOSNT HAVE TO BE HARD CODED.
rc = sleep(1);
end;
file dummy filevar=name dsd dlm=',';
do _n_ = 1 to 6;
put _n_:z5. name;
end; SO THIS CARDS STATEMENT IS JUST TEST DATA, YOU WONT USE THAT, YOU WILL USE WHAT YOU GET THROUGH THE PIPE.
cards; *read the cards. How do I get ALL the files I need here?;
.\xUsers\mherrin\Documents\trial\APX05132005.1
.\xUsers\mherrin\Documents\trial\APX05132005.2
.\xUsers\mherrin\Documents\trial\APX0513.DRF
.\xUsers\mherrin\Documents\trial\APX05142005.1
.\xUsers\mherrin\Documents\trial\APX05142005.2
.\xUsers\mherrin\Documents\trial\APX0514.DRF
;;;;
run;

*Read Type .1 (RACE) files;

data type1;
length command filename fname path $256.;
command = 'dir /s /b ".\xUsers"'; *dir has not been defined how does this work?; THIS IS THE COMMAND TO DOS.
infile dummy1 pipe filevar=command truncover;
input path $256.;
if scan(path,-1,'.') eq '1'; *selects only the .1 files;
put _infile_; *???;
infile dummy2 filevar=path filename=fname end=eof dsd;
filename = scan(fname,-2,'.\'); *fname does not exist in this data set at the end?;
length prefix $3.;
prefix = filename; *assumes prefix is always first 3 characters;
date = input (substr(filename,anydigit(filename)),mmddyy10.);
do while(not eof);
input obs name:$64.; ** Fields for type 1 files?;
output;
end;
format date date9.;
run;



In the end I get a data file with the variables you defined; filename, prefix, date, ... I know you wrote above to put in the field types for the data do I need to first declare them in the lenght statement above? It does not seem to be reading any information from the file itself only the file name so far.

Thanks.
data_null__
Jade | Level 19
> I wanted to walk through this, I am commenting it so
> maybe you can direct me in the things I do not
> understand.
>
>

This first data step creates "example" files. My attempt was to make a working example. If you submit this from a DM session it should create .\xUsers.... directories under the "current" directory. Then it just write a few files that the next step can read. You already have these so understanding how this work is not important at this time.

> dm 'clear log; clear output;';
> * create test data(files to be read in next step);
> data _null_;
> length command $256; *command will be the location
> n of the files;
> infile cards truncover; *tells SAS to read through
> h the cards later in the program;
> input name :$128.; *what it reads in will be called
> d 'name' What does the ':' do?
> if _n_ eq 1 then do;
> path =
> = quote(substr(name,1,find(name,'\',-length(name))));
> *define the path name;
> command = 'rmdir /S /Q ".\xUsers\"'; *don't
> t understand this step;
> infile dummy1 pipe filevar=command; *read all of
> of the files located in the command location (or open
> a pipe to that location);
> command=catx(' ','mkdir',path); *creates this
> is directory with the location designated in path;
> put command=;
> infile dummy1 pipe filevar=command; *pipe again to
> to command location. What exactly does filevar=
> do?;
> rc = sleep(1);
> end;
> file dummy filevar=name dsd dlm=',';
> do _n_ = 1 to 6;
> put _n_:z5. name;
> end;
> cards; *read the cards. How do I get ALL the files
> s I need here?;
> .\xUsers\mherrin\Documents\trial\APX05132005.1
> .\xUsers\mherrin\Documents\trial\APX05132005.2
> .\xUsers\mherrin\Documents\trial\APX0513.DRF
> .\xUsers\mherrin\Documents\trial\APX05142005.1
> .\xUsers\mherrin\Documents\trial\APX05142005.2
> .\xUsers\mherrin\Documents\trial\APX0514.DRF
> ;;;;
> run;
>

This next step is something somewhat like what I think you will end up writing. I reads all files NAME from "a directory tree" that is specific to YOU, using DIR /S/B "path".

This particular step looks for files with file type .1 when they are found it reads the files. This example just reads two fields that were written in the first step. Your actualy program will have a "much" long input statement.

> *Read Type .1 (RACE) files;
>
> data type1;
> length command filename fname path $256.;
> command = 'dir /s /b ".\xUsers"'; *dir has not been
> n defined how does this work?;
> infile dummy1 pipe filevar=command truncover;
> input path $256.;
> if scan(path,-1,'.') eq '1'; *selects only the .1
> 1 files;
> put _infile_; *???;
> infile dummy2 filevar=path filename=fname end=eof
> f dsd;
> filename = scan(fname,-2,'.\'); *fname does not
> t exist in this data set at the end?;
> length prefix $3.;
> prefix = filename; *assumes prefix is always first 3
> 3 characters;
> date = input
> t (substr(filename,anydigit(filename)),mmddyy10.);
> do while(not eof);
> input obs name:$64.; ** Fields for type 1 files?;
> output;
> end;
> format date date9.;
> run;
>
>
>
> In the end I get a data file with the variables you
> defined; filename, prefix, date, ... I know you
> wrote above to put in the field types for the data do
> I need to first declare them in the lenght statement
> above? It does not seem to be reading any
> information from the file itself only the file name
> so far.
>
> Thanks.

sas-innovate-2024.png

Don't miss out on SAS Innovate - Register now for the FREE Livestream!

Can't make it to Vegas? No problem! Watch our general sessions LIVE or on-demand starting April 17th. Hear from SAS execs, best-selling author Adam Grant, Hot Ones host Sean Evans, top tech journalist Kara Swisher, AI expert Cassie Kozyrkov, and the mind-blowing dance crew iLuminate! Plus, get access to over 20 breakout sessions.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 23 replies
  • 7435 views
  • 0 likes
  • 3 in conversation