Reading windows zipped file using filename and pipe

Accepted Solution Solved
Reply
Contributor
Posts: 42
Accepted Solution

Reading windows zipped file using filename and pipe

I'm trying to read a windows zipped .txt file on Linux. My code returns no records. If i manually unzip the file and reference the location in my infile statement, it reads the data fine.

Any help is appreciated.

Doesn't work:

*filename orig pipe "zip http://www.freddiemac.com/mbs/data/stacr/ri130701_13dn01.zip";

filename orig pipe "zip -cd &mdir.data/ri130701_13dn01.zip";

data orig;

infile orig dsd dlm='|' missover firstobs=1 lrecl=32767;

if record_type = '00' then input

record_type $ 1 - 2

file_name $ 4 - 15

file_date 17 - 24;

if record_type = '10' then input

reference_pool_number $ 4 - 9

reference_pool_issuance_upb 11 - 25

reference_pool_factor 27 - 38

cummulative_ce_factor 40 - 51

total_def_on_ce 53 - 58

total_def_on_ce_upb 60 - 74

uw_def_on_ce 76 - 81

uw_def_on_ce_upb 83 - 97

uncfd_def_on_ce_resc 99 - 104

uncfd_def_on_ce_resc_upb 106 - 120;

if record_type = '20' then input

loan_identifier $ 4 - 15

product_type $ 17 - 21

seller_name $ 23 - 52

property_state $ 54 - 55

postal_code $ 57 - 61

msa 63 - 67

first_payment_date 69 - 74

maturity_date 76 - 81

original_loan_term 83 - 85

original_interest_rate 87 - 92

original_upb 94 - 105

upb_at_issuance 107 - 118

loan_purpose $ 120 - 120

channel $ 122 - 122

property_type $ 124 - 125

number_of_units 127 - 128

occupancy $ 130 - 130

number_of_borrowers 132 - 133

first_time_homebuyer $ 135 - 135

prepayment_penalty $ 137 - 137

credit_score 139 - 141

original_ltv 143 - 145

original_cltv 147 - 149

original_dti 151 - 153

mi_cov_pct 155 - 157;

if record_type = '50' then do;

input

servicer_name $ 17 - 46

loan_age 48 - 50

remaining_mos_to_maturity 52 - 54

adj_remaining_mos_to_maturity 56 - 58

current_delinquency_status 60 - 61

payment_history 63 - 74

current_interest_rate 76 - 81

current_actual_upb 83 - 94

current_interest_bearing_upb 96 - 107

upb_at_pool_removal 109 - 120

zero_balance_code 122 - 123

zero_balance_effective_date 125 - 130

uw_defect_settlement_date 132 - 137

modification_flag $ 139 - 139

;

output;

end;

retain _all_;

run;

Works:

data test;

infile "&mdir.data/ri130701_13dn01.txt";

input @1 record_type $2. @;

if record_type = '00' then input

record_type $ 1 - 2

file_name $ 4 - 15

file_date 17 - 24;

if record_type = '10' then input

reference_pool_number $ 4 - 9

reference_pool_issuance_upb 11 - 25

reference_pool_factor 27 - 38

cummulative_ce_factor 40 - 51

total_def_on_ce 53 - 58

total_def_on_ce_upb 60 - 74

uw_def_on_ce 76 - 81

uw_def_on_ce_upb 83 - 97

uncfd_def_on_ce_resc 99 - 104

uncfd_def_on_ce_resc_upb 106 - 120;

if record_type = '20' then input

loan_identifier $ 4 - 15

product_type $ 17 - 21

seller_name $ 23 - 52

property_state $ 54 - 55

postal_code $ 57 - 61

msa 63 - 67

first_payment_date 69 - 74

maturity_date 76 - 81

original_loan_term 83 - 85

original_interest_rate 87 - 92

original_upb 94 - 105

upb_at_issuance 107 - 118

loan_purpose $ 120 - 120

channel $ 122 - 122

property_type $ 124 - 125

number_of_units 127 - 128

occupancy $ 130 - 130

number_of_borrowers 132 - 133

first_time_homebuyer $ 135 - 135

prepayment_penalty $ 137 - 137

credit_score 139 - 141

original_ltv 143 - 145

original_cltv 147 - 149

original_dti 151 - 153

mi_cov_pct 155 - 157;

if record_type = '50' then do;

input

servicer_name $ 17 - 46

loan_age 48 - 50

remaining_mos_to_maturity 52 - 54

adj_remaining_mos_to_maturity 56 - 58

current_delinquency_status 60 - 61

payment_history 63 - 74

current_interest_rate 76 - 81

current_actual_upb 83 - 94

current_interest_bearing_upb 96 - 107

upb_at_pool_removal 109 - 120

zero_balance_code 122 - 123

zero_balance_effective_date 125 - 130

uw_defect_settlement_date 132 - 137

modification_flag $ 139 - 139

;

output;

end;

retain _all_;

run;


Accepted Solutions
Solution
‎05-20-2015 03:49 PM
Community Manager
Posts: 2,771

Re: Reading windows zipped file using filename and pipe

The entire process can be accomplished with PROC HTTP and  FILENAME ZIP -- download and import, no external zip call needed.  This works in SAS 9.4.  I tested on Windows and on SAS University Edition (Linux).

/* Download the ZIP file */
filename dl "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

proc http
 
url="http://www.freddiemac.com/mbs/data/stacr/ri130701_13dn01.zip"
 
method='GET'
 
out=dl
 
ct="application/zip";
run;

filename dl clear;

/* Crack it open and read with FILENAME ZIP */
filename inzip ZIP "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

data orig;
  infile inzip(ri130701_13dn01.txt);
  input @1 record_type $2. @;

 
if record_type = '00' then
    input  record_type $
1 - 2 file_name $ 4 - 15 file_date 17 - 24;

 
if record_type = '10' then
    input
      reference_pool_number $
4 - 9
      reference_pool_issuance_upb
11 - 25
      reference_pool_factor
27 - 38
      cummulative_ce_factor
40 - 51
      total_def_on_ce
53 - 58
      total_def_on_ce_upb
60 - 74
      uw_def_on_ce
76 - 81
      uw_def_on_ce_upb
83 - 97
      uncfd_def_on_ce_resc
99 - 104
      uncfd_def_on_ce_resc_upb
106 - 120;

 
if record_type = '20' then
    input
      loan_identifier $
4 - 15
      product_type $
17 - 21
      seller_name $
23 - 52
      property_state $
54 - 55
      postal_code $
57 - 61
      msa
63 - 67
      first_payment_date
69 - 74
      maturity_date
76 - 81
      original_loan_term
83 - 85
      original_interest_rate
87 - 92
      original_upb
94 - 105
      upb_at_issuance
107 - 118
      loan_purpose $
120 - 120
      channel $
122 - 122
      property_type $
124 - 125
      number_of_units
127 - 128
      occupancy $
130 - 130
      number_of_borrowers
132 - 133
      first_time_homebuyer $
135 - 135
      prepayment_penalty $
137 - 137
      credit_score
139 - 141
      original_ltv
143 - 145
      original_cltv
147 - 149
      original_dti
151 - 153
      mi_cov_pct
155 - 157;

 
if record_type = '50' then
   
do;
     
input
        servicer_name $
17 - 46
        loan_age
48 - 50
        remaining_mos_to_maturity
52 - 54
        adj_remaining_mos_to_maturity
56 - 58
        current_delinquency_status
60 - 61
        payment_history
63 - 74
        current_interest_rate
76 - 81
        current_actual_upb
83 - 94
        current_interest_bearing_upb
96 - 107
        upb_at_pool_removal
109 - 120
        zero_balance_code
122 - 123
        zero_balance_effective_date
125 - 130
        uw_defect_settlement_date
132 - 137
        modification_flag $
139 - 139
      ;
      output;
   
end;

 
retain _all_;
run;

filename inzip clear;

More about FILENAME ZIP here:
and here:

Chris

View solution in original post


All Replies
Super User
Super User
Posts: 6,502

Re: Reading windows zipped file using filename and pipe

What do you mean by not working?  Do you get an error message?

Are there other files in that ZIP file so that your unzip command is returning extra information? Perhaps the ZIP command is also sending messages that are getting read as part of the data.

Is it as simple as the file having different end-of-line characters?

Contributor
Posts: 42

Re: Reading windows zipped file using filename and pipe

Not getting any errors. Here's my log from the one that doesn't work.

NOTE: The infile ORIG is:

      Pipe command="zip -cd /pm/ma/scott/fhlmc/stacr/data/ri130701_13dn01.zip"

NOTE: 3 records were read from the infile ORIG.

      The minimum record length was 0.

      The maximum record length was 77.

NOTE: The data set WORK.ORIG has 0 observations and 52 variables.

NOTE: DATA statement used (Total process time):

      real time           0.02 seconds

      cpu time            0.04 seconds

Super User
Super User
Posts: 6,502

Re: Reading windows zipped file using filename and pipe

Sounds like you are trying to read the error message from ZIP as the data.

Try this program to see what the three lines say.

data _null_;

   infile "zip -cd /pm/ma/scott/fhlmc/stacr/data/ri130701_13dn01.zip" pipe ;

   input;

   put _infile_;

run;

Solution
‎05-20-2015 03:49 PM
Community Manager
Posts: 2,771

Re: Reading windows zipped file using filename and pipe

The entire process can be accomplished with PROC HTTP and  FILENAME ZIP -- download and import, no external zip call needed.  This works in SAS 9.4.  I tested on Windows and on SAS University Edition (Linux).

/* Download the ZIP file */
filename dl "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

proc http
 
url="http://www.freddiemac.com/mbs/data/stacr/ri130701_13dn01.zip"
 
method='GET'
 
out=dl
 
ct="application/zip";
run;

filename dl clear;

/* Crack it open and read with FILENAME ZIP */
filename inzip ZIP "%sysfunc(getoption(WORK))/ri130701_13dn01.zip";

data orig;
  infile inzip(ri130701_13dn01.txt);
  input @1 record_type $2. @;

 
if record_type = '00' then
    input  record_type $
1 - 2 file_name $ 4 - 15 file_date 17 - 24;

 
if record_type = '10' then
    input
      reference_pool_number $
4 - 9
      reference_pool_issuance_upb
11 - 25
      reference_pool_factor
27 - 38
      cummulative_ce_factor
40 - 51
      total_def_on_ce
53 - 58
      total_def_on_ce_upb
60 - 74
      uw_def_on_ce
76 - 81
      uw_def_on_ce_upb
83 - 97
      uncfd_def_on_ce_resc
99 - 104
      uncfd_def_on_ce_resc_upb
106 - 120;

 
if record_type = '20' then
    input
      loan_identifier $
4 - 15
      product_type $
17 - 21
      seller_name $
23 - 52
      property_state $
54 - 55
      postal_code $
57 - 61
      msa
63 - 67
      first_payment_date
69 - 74
      maturity_date
76 - 81
      original_loan_term
83 - 85
      original_interest_rate
87 - 92
      original_upb
94 - 105
      upb_at_issuance
107 - 118
      loan_purpose $
120 - 120
      channel $
122 - 122
      property_type $
124 - 125
      number_of_units
127 - 128
      occupancy $
130 - 130
      number_of_borrowers
132 - 133
      first_time_homebuyer $
135 - 135
      prepayment_penalty $
137 - 137
      credit_score
139 - 141
      original_ltv
143 - 145
      original_cltv
147 - 149
      original_dti
151 - 153
      mi_cov_pct
155 - 157;

 
if record_type = '50' then
   
do;
     
input
        servicer_name $
17 - 46
        loan_age
48 - 50
        remaining_mos_to_maturity
52 - 54
        adj_remaining_mos_to_maturity
56 - 58
        current_delinquency_status
60 - 61
        payment_history
63 - 74
        current_interest_rate
76 - 81
        current_actual_upb
83 - 94
        current_interest_bearing_upb
96 - 107
        upb_at_pool_removal
109 - 120
        zero_balance_code
122 - 123
        zero_balance_effective_date
125 - 130
        uw_defect_settlement_date
132 - 137
        modification_flag $
139 - 139
      ;
      output;
   
end;

 
retain _all_;
run;

filename inzip clear;

More about FILENAME ZIP here:
and here:

Chris
Contributor
Posts: 42

Re: Reading windows zipped file using filename and pipe

Yes, I'm using proc http to download the data. I actually kick them all off at one time using X command.

Your fix did the trick! Weird I wonder if this is a windows zip issue. I have other pieces of code that read gzip just file.

filename orig zip "&mdir.data/ri130701_13dn01.zip";

data orig;

infile orig(ri130701_13dn01.txt);

Super User
Posts: 10,549

Re: Reading windows zipped file using filename and pipe

Since the code labeled "Doesn't work" doesn't have this line from "works"

input @1 record_type $2. @;

I would start there.

Contributor
Posts: 42

Re: Reading windows zipped file using filename and pipe

Sorry I pasted it in incorrect.

filename orig pipe "zip -cd &mdir.data/ri130701_13dn01.zip";

data orig;

infile orig dsd dlm='|' missover firstobs=1 lrecl=32767;

input @1 record_type $2. @;

if record_type = '00' then input

record_type $ 1 - 2

file_name $ 4 - 15

file_date 17 - 24;

if record_type = '10' then input

reference_pool_number $ 4 - 9

reference_pool_issuance_upb 11 - 25

reference_pool_factor 27 - 38

cummulative_ce_factor 40 - 51

total_def_on_ce 53 - 58

total_def_on_ce_upb 60 - 74

uw_def_on_ce 76 - 81

uw_def_on_ce_upb 83 - 97

uncfd_def_on_ce_resc 99 - 104

uncfd_def_on_ce_resc_upb 106 - 120;

if record_type = '20' then input

loan_identifier $ 4 - 15

product_type $ 17 - 21

seller_name $ 23 - 52

property_state $ 54 - 55

postal_code $ 57 - 61

msa 63 - 67

first_payment_date 69 - 74

maturity_date 76 - 81

original_loan_term 83 - 85

original_interest_rate 87 - 92

original_upb 94 - 105

upb_at_issuance 107 - 118

loan_purpose $ 120 - 120

channel $ 122 - 122

property_type $ 124 - 125

number_of_units 127 - 128

occupancy $ 130 - 130

number_of_borrowers 132 - 133

first_time_homebuyer $ 135 - 135

prepayment_penalty $ 137 - 137

credit_score 139 - 141

original_ltv 143 - 145

original_cltv 147 - 149

original_dti 151 - 153

mi_cov_pct 155 - 157;

if record_type = '50' then do;

input

servicer_name $ 17 - 46

loan_age 48 - 50

remaining_mos_to_maturity 52 - 54

adj_remaining_mos_to_maturity 56 - 58

current_delinquency_status 60 - 61

payment_history 63 - 74

current_interest_rate 76 - 81

current_actual_upb 83 - 94

current_interest_bearing_upb 96 - 107

upb_at_pool_removal 109 - 120

zero_balance_code 122 - 123

zero_balance_effective_date 125 - 130

uw_defect_settlement_date 132 - 137

modification_flag $ 139 - 139

;

output;

end;

retain _all_;

run;

🔒 This topic is solved and locked.

Need further help from the community? Please ask a new question.

Discussion stats
  • 7 replies
  • 271 views
  • 0 likes
  • 4 in conversation