BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
econ
Quartz | Level 8

I'm trying to read a windows zipped .txt file on Linux. My code returns no records. If i manually unzip the file and reference the location in my infile statement, it reads the data fine.

Any help is appreciated.

Doesn't work:

*filename orig pipe "zip http://www.freddiemac.com/mbs/data/stacr/ri130701_13dn01.zip";

filename orig pipe "zip -cd &mdir.data/ri130701_13dn01.zip";

data orig;

infile orig dsd dlm='|' missover firstobs=1 lrecl=32767;

if record_type = '00' then input

record_type $ 1 - 2

file_name $ 4 - 15

file_date 17 - 24;

if record_type = '10' then input

reference_pool_number $ 4 - 9

reference_pool_issuance_upb 11 - 25

reference_pool_factor 27 - 38

cummulative_ce_factor 40 - 51

total_def_on_ce 53 - 58

total_def_on_ce_upb 60 - 74

uw_def_on_ce 76 - 81

uw_def_on_ce_upb 83 - 97

uncfd_def_on_ce_resc 99 - 104

uncfd_def_on_ce_resc_upb 106 - 120;

if record_type = '20' then input

loan_identifier $ 4 - 15

product_type $ 17 - 21

seller_name $ 23 - 52

property_state $ 54 - 55

postal_code $ 57 - 61

msa 63 - 67

first_payment_date 69 - 74

maturity_date 76 - 81

original_loan_term 83 - 85

original_interest_rate 87 - 92

original_upb 94 - 105

upb_at_issuance 107 - 118

loan_purpose $ 120 - 120

channel $ 122 - 122

property_type $ 124 - 125

number_of_units 127 - 128

occupancy $ 130 - 130

number_of_borrowers 132 - 133

first_time_homebuyer $ 135 - 135

prepayment_penalty $ 137 - 137

credit_score 139 - 141

original_ltv 143 - 145

original_cltv 147 - 149

original_dti 151 - 153

mi_cov_pct 155 - 157;

if record_type = '50' then do;

input

servicer_name $ 17 - 46

loan_age 48 - 50

remaining_mos_to_maturity 52 - 54

adj_remaining_mos_to_maturity 56 - 58

current_delinquency_status 60 - 61

payment_history 63 - 74

current_interest_rate 76 - 81

current_actual_upb 83 - 94

current_interest_bearing_upb 96 - 107

upb_at_pool_removal 109 - 120

zero_balance_code 122 - 123

zero_balance_effective_date 125 - 130

uw_defect_settlement_date 132 - 137

modification_flag $ 139 - 139

;

output;

end;

retain _all_;

run;

Works:

data test;

infile "&mdir.data/ri130701_13dn01.txt";

input @1 record_type $2. @;

if record_type = '00' then input

record_type $ 1 - 2

file_name $ 4 - 15

file_date 17 - 24;

if record_type = '10' then input

reference_pool_number $ 4 - 9

reference_pool_issuance_upb 11 - 25

reference_pool_factor 27 - 38

cummulative_ce_factor 40 - 51

total_def_on_ce 53 - 58

total_def_on_ce_upb 60 - 74

uw_def_on_ce 76 - 81

uw_def_on_ce_upb 83 - 97

uncfd_def_on_ce_resc 99 - 104

uncfd_def_on_ce_resc_upb 106 - 120;

if record_type = '20' then input

loan_identifier $ 4 - 15

product_type $ 17 - 21

seller_name $ 23 - 52

property_state $ 54 - 55

postal_code $ 57 - 61

msa 63 - 67

first_payment_date 69 - 74

maturity_date 76 - 81

original_loan_term 83 - 85

original_interest_rate 87 - 92

original_upb 94 - 105

upb_at_issuance 107 - 118

loan_purpose $ 120 - 120

channel $ 122 - 122

property_type $ 124 - 125

number_of_units 127 - 128

occupancy $ 130 - 130

number_of_borrowers 132 - 133

first_time_homebuyer $ 135 - 135

prepayment_penalty $ 137 - 137

credit_score 139 - 141

original_ltv 143 - 145

original_cltv 147 - 149

original_dti 151 - 153

mi_cov_pct 155 - 157;

if record_type = '50' then do;

input

servicer_name $ 17 - 46

loan_age 48 - 50

remaining_mos_to_maturity 52 - 54

adj_remaining_mos_to_maturity 56 - 58

current_delinquency_status 60 - 61

payment_history 63 - 74

current_interest_rate 76 - 81

current_actual_upb 83 - 94

current_interest_bearing_upb 96 - 107

upb_at_pool_removal 109 - 120

zero_balance_code 122 - 123

zero_balance_effective_date 125 - 130

uw_defect_settlement_date 132 - 137

modification_flag $ 139 - 139

;

output;

end;

retain _all_;

run;

1 ACCEPTED SOLUTION

Accepted Solutions
ChrisHemedinger
Community Manager

The entire process can be accomplished with PROC HTTP and  FILENAME ZIP -- download and import, no external zip call needed.  This works in SAS 9.4.  I tested on Windows and on SAS University Edition (Linux).

/* Download the ZIP file */
filename dl "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

proc http
 
url="http://www.freddiemac.com/mbs/data/stacr/ri130701_13dn01.zip"
 
method='GET'
 
out=dl
 
ct="application/zip";
run;

filename dl clear;

/* Crack it open and read with FILENAME ZIP */
filename inzip ZIP "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

data orig;
  infile inzip(ri130701_13dn01.txt);
  input @1 record_type $2. @;

 
if record_type = '00' then
    input  record_type $
1 - 2 file_name $ 4 - 15 file_date 17 - 24;

 
if record_type = '10' then
    input
      reference_pool_number $
4 - 9
      reference_pool_issuance_upb
11 - 25
      reference_pool_factor
27 - 38
      cummulative_ce_factor
40 - 51
      total_def_on_ce
53 - 58
      total_def_on_ce_upb
60 - 74
      uw_def_on_ce
76 - 81
      uw_def_on_ce_upb
83 - 97
      uncfd_def_on_ce_resc
99 - 104
      uncfd_def_on_ce_resc_upb
106 - 120;

 
if record_type = '20' then
    input
      loan_identifier $
4 - 15
      product_type $
17 - 21
      seller_name $
23 - 52
      property_state $
54 - 55
      postal_code $
57 - 61
      msa
63 - 67
      first_payment_date
69 - 74
      maturity_date
76 - 81
      original_loan_term
83 - 85
      original_interest_rate
87 - 92
      original_upb
94 - 105
      upb_at_issuance
107 - 118
      loan_purpose $
120 - 120
      channel $
122 - 122
      property_type $
124 - 125
      number_of_units
127 - 128
      occupancy $
130 - 130
      number_of_borrowers
132 - 133
      first_time_homebuyer $
135 - 135
      prepayment_penalty $
137 - 137
      credit_score
139 - 141
      original_ltv
143 - 145
      original_cltv
147 - 149
      original_dti
151 - 153
      mi_cov_pct
155 - 157;

 
if record_type = '50' then
   
do;
     
input
        servicer_name $
17 - 46
        loan_age
48 - 50
        remaining_mos_to_maturity
52 - 54
        adj_remaining_mos_to_maturity
56 - 58
        current_delinquency_status
60 - 61
        payment_history
63 - 74
        current_interest_rate
76 - 81
        current_actual_upb
83 - 94
        current_interest_bearing_upb
96 - 107
        upb_at_pool_removal
109 - 120
        zero_balance_code
122 - 123
        zero_balance_effective_date
125 - 130
        uw_defect_settlement_date
132 - 137
        modification_flag $
139 - 139
      ;
      output;
   
end;

 
retain _all_;
run;

filename inzip clear;

More about FILENAME ZIP here:
and here:

Chris
Learn from the Experts! Check out the huge catalog of free sessions in the Ask the Expert webinar series.

View solution in original post

7 REPLIES 7
Tom
Super User Tom
Super User

What do you mean by not working?  Do you get an error message?

Are there other files in that ZIP file so that your unzip command is returning extra information? Perhaps the ZIP command is also sending messages that are getting read as part of the data.

Is it as simple as the file having different end-of-line characters?

econ
Quartz | Level 8

Not getting any errors. Here's my log from the one that doesn't work.

NOTE: The infile ORIG is:

      Pipe command="zip -cd /pm/ma/scott/fhlmc/stacr/data/ri130701_13dn01.zip"

NOTE: 3 records were read from the infile ORIG.

      The minimum record length was 0.

      The maximum record length was 77.

NOTE: The data set WORK.ORIG has 0 observations and 52 variables.

NOTE: DATA statement used (Total process time):

      real time           0.02 seconds

      cpu time            0.04 seconds

Tom
Super User Tom
Super User

Sounds like you are trying to read the error message from ZIP as the data.

Try this program to see what the three lines say.

data _null_;

   infile "zip -cd /pm/ma/scott/fhlmc/stacr/data/ri130701_13dn01.zip" pipe ;

   input;

   put _infile_;

run;

ChrisHemedinger
Community Manager

The entire process can be accomplished with PROC HTTP and  FILENAME ZIP -- download and import, no external zip call needed.  This works in SAS 9.4.  I tested on Windows and on SAS University Edition (Linux).

/* Download the ZIP file */
filename dl "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

proc http
 
url="http://www.freddiemac.com/mbs/data/stacr/ri130701_13dn01.zip"
 
method='GET'
 
out=dl
 
ct="application/zip";
run;

filename dl clear;

/* Crack it open and read with FILENAME ZIP */
filename inzip ZIP "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

data orig;
  infile inzip(ri130701_13dn01.txt);
  input @1 record_type $2. @;

 
if record_type = '00' then
    input  record_type $
1 - 2 file_name $ 4 - 15 file_date 17 - 24;

 
if record_type = '10' then
    input
      reference_pool_number $
4 - 9
      reference_pool_issuance_upb
11 - 25
      reference_pool_factor
27 - 38
      cummulative_ce_factor
40 - 51
      total_def_on_ce
53 - 58
      total_def_on_ce_upb
60 - 74
      uw_def_on_ce
76 - 81
      uw_def_on_ce_upb
83 - 97
      uncfd_def_on_ce_resc
99 - 104
      uncfd_def_on_ce_resc_upb
106 - 120;

 
if record_type = '20' then
    input
      loan_identifier $
4 - 15
      product_type $
17 - 21
      seller_name $
23 - 52
      property_state $
54 - 55
      postal_code $
57 - 61
      msa
63 - 67
      first_payment_date
69 - 74
      maturity_date
76 - 81
      original_loan_term
83 - 85
      original_interest_rate
87 - 92
      original_upb
94 - 105
      upb_at_issuance
107 - 118
      loan_purpose $
120 - 120
      channel $
122 - 122
      property_type $
124 - 125
      number_of_units
127 - 128
      occupancy $
130 - 130
      number_of_borrowers
132 - 133
      first_time_homebuyer $
135 - 135
      prepayment_penalty $
137 - 137
      credit_score
139 - 141
      original_ltv
143 - 145
      original_cltv
147 - 149
      original_dti
151 - 153
      mi_cov_pct
155 - 157;

 
if record_type = '50' then
   
do;
     
input
        servicer_name $
17 - 46
        loan_age
48 - 50
        remaining_mos_to_maturity
52 - 54
        adj_remaining_mos_to_maturity
56 - 58
        current_delinquency_status
60 - 61
        payment_history
63 - 74
        current_interest_rate
76 - 81
        current_actual_upb
83 - 94
        current_interest_bearing_upb
96 - 107
        upb_at_pool_removal
109 - 120
        zero_balance_code
122 - 123
        zero_balance_effective_date
125 - 130
        uw_defect_settlement_date
132 - 137
        modification_flag $
139 - 139
      ;
      output;
   
end;

 
retain _all_;
run;

filename inzip clear;

More about FILENAME ZIP here:
and here:

Chris
Learn from the Experts! Check out the huge catalog of free sessions in the Ask the Expert webinar series.
econ
Quartz | Level 8

Yes, I'm using proc http to download the data. I actually kick them all off at one time using X command.

Your fix did the trick! Weird I wonder if this is a windows zip issue. I have other pieces of code that read gzip just file.

filename orig zip "&mdir.data/ri130701_13dn01.zip";

data orig;

infile orig(ri130701_13dn01.txt);

ballardw
Super User

Since the code labeled "Doesn't work" doesn't have this line from "works"

input @1 record_type $2. @;

I would start there.

econ
Quartz | Level 8

Sorry I pasted it in incorrect.

filename orig pipe "zip -cd &mdir.data/ri130701_13dn01.zip";

data orig;

infile orig dsd dlm='|' missover firstobs=1 lrecl=32767;

input @1 record_type $2. @;

if record_type = '00' then input

record_type $ 1 - 2

file_name $ 4 - 15

file_date 17 - 24;

if record_type = '10' then input

reference_pool_number $ 4 - 9

reference_pool_issuance_upb 11 - 25

reference_pool_factor 27 - 38

cummulative_ce_factor 40 - 51

total_def_on_ce 53 - 58

total_def_on_ce_upb 60 - 74

uw_def_on_ce 76 - 81

uw_def_on_ce_upb 83 - 97

uncfd_def_on_ce_resc 99 - 104

uncfd_def_on_ce_resc_upb 106 - 120;

if record_type = '20' then input

loan_identifier $ 4 - 15

product_type $ 17 - 21

seller_name $ 23 - 52

property_state $ 54 - 55

postal_code $ 57 - 61

msa 63 - 67

first_payment_date 69 - 74

maturity_date 76 - 81

original_loan_term 83 - 85

original_interest_rate 87 - 92

original_upb 94 - 105

upb_at_issuance 107 - 118

loan_purpose $ 120 - 120

channel $ 122 - 122

property_type $ 124 - 125

number_of_units 127 - 128

occupancy $ 130 - 130

number_of_borrowers 132 - 133

first_time_homebuyer $ 135 - 135

prepayment_penalty $ 137 - 137

credit_score 139 - 141

original_ltv 143 - 145

original_cltv 147 - 149

original_dti 151 - 153

mi_cov_pct 155 - 157;

if record_type = '50' then do;

input

servicer_name $ 17 - 46

loan_age 48 - 50

remaining_mos_to_maturity 52 - 54

adj_remaining_mos_to_maturity 56 - 58

current_delinquency_status 60 - 61

payment_history 63 - 74

current_interest_rate 76 - 81

current_actual_upb 83 - 94

current_interest_bearing_upb 96 - 107

upb_at_pool_removal 109 - 120

zero_balance_code 122 - 123

zero_balance_effective_date 125 - 130

uw_defect_settlement_date 132 - 137

modification_flag $ 139 - 139

;

output;

end;

retain _all_;

run;

SAS Innovate 2025: Save the Date

 SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

Save the date!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 7 replies
  • 1947 views
  • 0 likes
  • 4 in conversation