BookmarkSubscribeRSS Feed

SAS에서 Hadoop HIVE 데이터 추출 방안(JAVA 사용)

Started ‎06-15-2020 by
Modified ‎06-15-2020 by
Views 342

SAS에서 Hadoop Access 관련 라이센스가 없는 경우 hive-jdbc.jar 파일을 통하여 접속 후 데이터 추출 방식;

* Hadoop 관리 IT 담당자 협조 필요

 

* Mvnrepository : https://mvnrepository.com/

 

* http://kshitish-bigdata.blogspot.com/2016/02/hive-jdbc-connection-and-jar-file-errors.html;

 

* JAR FILE     PATH

* a.  hive-jdbc.jar               /usr/lib/hive/lib

* b.  hive-service                /usr/lib/hive/lib

* c.  libthrift-0.9.2             /usr/lib/hive/lib

* d.  commons-logging             /usr/lib/hive/lib

* e.  httpclient-4.2.5            /usr/lib/hive/lib

* f.  httpcore                    /usr/lib/hive/lib

* g.  hive-common                 /usr/lib/hive/lib

* h.  hive-metastore.jar          /usr/lib/hive/lib

* i.  hadoop-common               /usr/lib/hadoop/client-0.20

* j.  slf4j-api                   /usr/lib/hadoop/client

* k.  log4j                       /usr/lib/hadoop/lib

* l.  slf4j-log4j                 /usr/lib/hadoop/lib;

 

* /usr/hdp/2.4.0.0-169/hadoop/client;

* /usr/hdp/2.4.0.0-169/hive/lib;

 

 

 

filename cp temp;

filename jwt1  'H:\SASDATA\02. BACK\ETC\TEST/hive-jdbc-1.2.1000.2.4.0.0-169.jar';

filename jwt2  'H:\SASDATA\02. BACK\ETC\TEST/hive-service-1.2.1000.2.4.0.0-169.jar';

filename jwt3  'H:\SASDATA\02. BACK\ETC\TEST/libthrift-0.9.2.jar';

filename jwt4  'H:\SASDATA\02. BACK\ETC\TEST/httpclient-4.4.jar';

filename jwt5  'H:\SASDATA\02. BACK\ETC\TEST/httpcore-4.4.jar';

filename jwt6  'H:\SASDATA\02. BACK\ETC\TEST/commons-logging-1.1.3.jar';

filename jwt7  'H:\SASDATA\02. BACK\ETC\TEST/hive-metastore-1.2.1000.2.4.0.0-169.jar';

filename jwt8  'H:\SASDATA\02. BACK\ETC\TEST/hive-common-1.2.1000.2.4.0.0-169.jar';

filename jwt9  'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/hadoop-common-3.2.1.jar';

filename jwt10 'H:\SASDATA\02. BACK\ETC\TEST\slf4j-api-1.7.10.jar';

 

filename jwt11 'H:\SASDATA\02. BACK\ETC\TEST\guava-11.0.2.jar';

filename jwt14 'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/opencsv-5.0.jar';

 

 

proc groovy;

add classpath=cp;

add classpath=jwt1;

add classpath=jwt2;

add classpath=jwt3;

add classpath=jwt4;

add classpath=jwt5;

add classpath=jwt6;

add classpath=jwt7;

add classpath=jwt8;

add classpath=jwt9;

add classpath=jwt10;

add classpath=jwt11;

    add classpath=jwt14;

    

    submit;

    import groovy.sql.Sql;

    import java.util.List;

    import org.apache.hive.jdbc.HiveDriver;

    import com.opencsv.CSVWriter;

    

    /* 접속 정보 */

    def db = [url: 'jdbc:hive2://ip(수정).ip(수정).co.kr:10000/default;',user:'hive',password:'패스워드', driver:'org.apache.hive.jdbc.HiveDriver']

    def sql = Sql.newInstance(db.url, db.user, db.password, db.driver)

      

        /* 추출을 위한 SQL 구문 */

        List id = sql.rows(''' SELECT unique_key,

                                     speaker,

                                     pattern_1,

                                     pattern_2,

                                     pattern_3

                              FROM   t_analsys_text_org

                              WHERE  input_date between "2019-11-01 00:00:00" and "2019-11-01 23:59:59"

                                AND  transcription_content != ''

                              LIMIT 200000 ''')

      

        /* exports.putAt('ID',id); */

        /* 출력 파일 지정 */

        String csv = "H:\\SASDATA\\SA_AUTO_RENEWAL.csv";

        CSVWriter writer = new CSVWriter(new FileWriter(csv));

      

        /* CSV 파일 header 정보 지정 */

        String [] header = new String[5];

        header[0]  = "unique_key";

        header[1]  = "speaker";

        header[2]  = "pattern_1";

        header[3]  = "pattern_2";

        header[4]  = "pattern_3";

        writer.writeNext(header);

      

      id.each {

        String [] record = new String[5];

        record[0]  = it.unique_key.toString();

        record[1]  = it.speaker.toString();

        record[2]  = it.pattern_1.toString();

        record[3]  = it.pattern_2.toString();

        record[4]  = it.pattern_3.toString();

        writer.writeNext(record);

      }         

      

      writer.close();

    endsubmit;

quit;

run;

      

      

/* 검증용 */

/* 

data _NULL_;

      put "----> &ID";

run; 

 

 

* 출처 : http://statwith.com/sas%ec%97%90%ec%84%9c-hadoop-hive-%eb%8d%b0%ec%9d%b4%ed%84%b0-%ec%b6%94%ec%b6%9c...

 

* 통계분석연구회 : http://cafe.daum.net/statsas

Contributors
Version history
Last update:
‎06-15-2020 04:28 AM
Updated by:

sas-innovate-2026-white.png



April 27 – 30 | Gaylord Texan | Grapevine, Texas

Registration is open

Walk in ready to learn. Walk out ready to deliver. This is the data and AI conference you can't afford to miss.
Register now and save with the early bird rate—just $795!

Register now

Article Labels
Article Tags