Hi, Here is an example of using the FILENAME statement. /* FILENAME to Hadoop Example */ /* Show the mechanics of writing to, and reading from, HDFS */ FILENAME hdp1 hadoop 'test.txt' cfg="C:\Hadoop_cfg\hadoop.xml" user='bob'; /* Write the file to HDFS */ data _null_; file hdp1; put ' Test Test Test'; run; /* Read the file from HDFS */ data test; infile hdp1; input textline $15.; run; Here is an example of the Hadoop procedure. filename cfg 'C:\Hadoop_cfg\hadoop.xml'; /* setup the environment */ /* Create /user/bob/Books directory */ /* Copy war_and_peace.txt to HDFS. */ /* Copy moby_dick.txt to HDFS. */ proc hadoop options=cfg username="bob" verbose; HDFS MKDIR='/user/bob/Books'; HDFS COPYFROMLOCAL="C:\Hadoop_data\moby_dick.txt" OUT='/user/bob/Books/moby_dick.txt'; HDFS COPYFROMLOCAL="C:\Hadoop_data\war_and_peace.txt" OUT='/user/bob/Books/war_and_peace.txt'; run; /* Run the Word Count sample program on Moby Dick */ /* hadoop-examples-1.2.0.1.3.0.0-96.jar */ proc hadoop options=cfg user="bob" verbose; mapreduce input='/user/bob/Books/moby_dick.txt' output='/user/bob/outBook' jar='C:\Hadoop_examples\hadoop-examples-1.2.0.1.3.0.0-96.jar' outputkey="org.apache.hadoop.io.Text" outputvalue="org.apache.hadoop.io.IntWritable" reduce="org.apache.hadoop.examples.WordCount$IntSumReducer" combine="org.apache.hadoop.examples.WordCount$IntSumReducer" map="org.apache.hadoop.examples.WordCount$TokenizerMapper"; run; /* Copy the output from the MapReduce job to the laptop */ /* Clean up the directories and files */ proc hadoop options=cfg username="bob" password="Bogus" verbose; HDFS COPYTOLOCAL="/user/bob/outBook/part-r-00000" OUT="C:\Hadoop_data\output\moby_dick_wordcount.txt" OVERWRITE; HDFS delete='/user/bob/.staging'; HDFS delete='/user/bob/Books'; HDFS delete='/user/bob/outBook'; run; Here are examples of using SAS/ACCESS to Hadoop. libname myhdp hadoop server=hdp13 SUBPROTOCOL=hive2 user=myuser ; options sastrace=',,,d' sastraceloc=saslog nostsuffix; /* Display SQL being sent to the database */ options sastrace=',,,d' sastraceloc=saslog nostsuffix; /* CTAS */ proc sql; connect to hadoop (server=hdp13 user=myuser subprotocol=hive2); execute (create table myuser_store_cnt row format delimited fields terminated by '\001' stored as textfile as select customer_rk, count(*) as total_orders from order_fact group by customer_rk) by hadoop; disconnect from hadoop; quit; /* Create a SAS data set from Hadoop data */ proc sql; create table work.join_test as ( select c.customer_rk, o.store_id from myhdp.customer_dim c , myhdp.order_fact o where c.customer_rk = o.customer_rk); quit; /* PROC FREQ example */ data myhdp.myuser_class; set sashelp.class; run; proc freq data=myhdp.sasxjb_class2; tables sex * age; where age > 9; title 'Catchy Title Goes Here'; run; /* Clean up */ proc sql; connect to hadoop (server=duped user=myuser subprotocol=hive2); execute (drop table order_fact) by hadoop; execute (drop table customer_dim) by hadoop; execute (drop table myuser_store_cnt) by hadoop; execute (drop table myuser_class) by hadoop; drop table work.join_test; disconnect from hadoop; quit;
... View more