but , the code without using input() works . I import data using proc import , is this the reason?
proc import datafile = "C:\SAS data and program\data\data from web source\Geo-targeting\Data Files\test file.csv"
out = test
dbms = CSV
replace
;
run;
proc print data = test;
run;
proc import datafile = "C:\SAS data and program\data\data from web source\Geo-targeting\Data Files\Sales_Q1.csv"
out = Sales_Q1
dbms = CSV
replace
;
run;
proc means data=sales_q1;
var price;
run;
proc sql;
select
distinct
qtr(datepart(tran_date))
as quarter,
year(datepart(tran_date))
as year
from sales_q1;
quit;
proc import datafile = "C:\SAS data and program\data\data from web source\Geo-targeting\Data Files/locations.csv"
out = locations
dbms = CSV
replace
;
run;
proc import datafile = "C:\SAS data and program\data\data from web source\Geo-targeting\Data Files/postcode.csv"
out = postcode
dbms = CSV
replace
;
run;
proc sql;
create table sales_q1_loc as
select a.*, b.os_x as store_x, b.os_y as store_y
from sales_q1 a, locations b
where a.store_postcode = b.postcode;
quit;
proc sql;
create table sales_q1_loc_pos as
select a.*, b.os_x as cust_x, b.os_y as cust_y
from sales_q1_loc a left join postcode b
on a.customer_postcode = b.postcode;
quit;
data sales_check;
set sales_q1_loc_pos;
where store_x = . or
store_y = . or
cust_x = . or
cust_y = .;
keep customer_postcode store_postcode store_x store_y cust_x cust_y;
run;
proc print data = sales_check;
run;
proc contents data = apple.sales_q1;
run;
*******************************************************************************************************************
*******************************************************************************************************************
*******************************************************************************************************************
libname apple 'C:\SAS data and program\data\data from web source\Geo-targeting\Data Files';
data apple.computers;
infile 'C:\SAS data and program\data\data from web source\Geo-targeting\Data Files\computers.csv' dsd firstobs=2;
input configuration screen_size battery_hours ram processor_speed SSD $ HD_size $;
run;
data apple.locations;
infile 'C:\SAS data and program\data\data from web source\Geo-targeting\Data Files\locations.csv' dsd firstobs=2;
input Postcode $ OS_X OS_Y Lat Long ;
run;
data apple.postcode;
infile 'C:\SAS data and program\data\data from web source\Geo-targeting\Data Files\postcode.csv' dsd firstobs=2;
input Postcode $ OS_X OS_Y;
run;
data apple.sales_q1;
infile 'C:\SAS data and program\data\data from web source\Geo-targeting\Data Files\sales_q1.csv' dsd firstobs=2;
length tran_date $ 16;
input tran_date $ Configuration Customer_Postcode $ Store_Postcode $ month price;
run;
proc contents data = apple.sales_q1;
run;
proc sql;
select
distinct
qtr(datepart(input(tran_date,datetime18.)))
as quarter,
year(datepart(input(tran_date,datetime18.)))
as year
from apple.sales_q1;
quit;
proc sql;
create table sales_q1_loc as
select a.*, b.os_x as store_x, b.os_y as store_y
from apple.sales_q1 a, apple.locations b
where a.store_postcode = b.postcode;
quit;
proc print data = sales_q1_loc(obs=10);
run;
proc sql;
create table sales_q1_loc_pos as
select a.*, b.os_x as cust_x, b.os_y as cust_y
from sales_q1_loc a left join apple.postcode b
on a.customer_postcode = b.postcode;
quit;
data sales_check;
set sales_q1_loc_pos;
where store_x = . or
store_y = . or
cust_x = . or
cust_y = .;
keep customer_postcode store_postcode store_x store_y cust_x cust_y;
run;
proc means data = sales_check;
run;
The last proc (proc means) in the first code , I can detect 9698 missing value . The last proc (proc means) in the second code , I can detect 0 missing value.
But, I just use different way to read data from raw file:
the first code : proc import
the second code : data step
why ?
... View more