BookmarkSubscribeRSS Feed

SAS에서 Hadoop HIVE 데이터 추출 방안(JAVA 사용)

Started ‎06-15-2020 by
Modified ‎06-15-2020 by
Views 139

SAS에서 Hadoop Access 관련 라이센스가 없는 경우 hive-jdbc.jar 파일을 통하여 접속 후 데이터 추출 방식;

* Hadoop 관리 IT 담당자 협조 필요

 

* Mvnrepository : https://mvnrepository.com/

 

* http://kshitish-bigdata.blogspot.com/2016/02/hive-jdbc-connection-and-jar-file-errors.html;

 

* JAR FILE     PATH

* a.  hive-jdbc.jar               /usr/lib/hive/lib

* b.  hive-service                /usr/lib/hive/lib

* c.  libthrift-0.9.2             /usr/lib/hive/lib

* d.  commons-logging             /usr/lib/hive/lib

* e.  httpclient-4.2.5            /usr/lib/hive/lib

* f.  httpcore                    /usr/lib/hive/lib

* g.  hive-common                 /usr/lib/hive/lib

* h.  hive-metastore.jar          /usr/lib/hive/lib

* i.  hadoop-common               /usr/lib/hadoop/client-0.20

* j.  slf4j-api                   /usr/lib/hadoop/client

* k.  log4j                       /usr/lib/hadoop/lib

* l.  slf4j-log4j                 /usr/lib/hadoop/lib;

 

* /usr/hdp/2.4.0.0-169/hadoop/client;

* /usr/hdp/2.4.0.0-169/hive/lib;

 

 

 

filename cp temp;

filename jwt1  'H:\SASDATA\02. BACK\ETC\TEST/hive-jdbc-1.2.1000.2.4.0.0-169.jar';

filename jwt2  'H:\SASDATA\02. BACK\ETC\TEST/hive-service-1.2.1000.2.4.0.0-169.jar';

filename jwt3  'H:\SASDATA\02. BACK\ETC\TEST/libthrift-0.9.2.jar';

filename jwt4  'H:\SASDATA\02. BACK\ETC\TEST/httpclient-4.4.jar';

filename jwt5  'H:\SASDATA\02. BACK\ETC\TEST/httpcore-4.4.jar';

filename jwt6  'H:\SASDATA\02. BACK\ETC\TEST/commons-logging-1.1.3.jar';

filename jwt7  'H:\SASDATA\02. BACK\ETC\TEST/hive-metastore-1.2.1000.2.4.0.0-169.jar';

filename jwt8  'H:\SASDATA\02. BACK\ETC\TEST/hive-common-1.2.1000.2.4.0.0-169.jar';

filename jwt9  'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/hadoop-common-3.2.1.jar';

filename jwt10 'H:\SASDATA\02. BACK\ETC\TEST\slf4j-api-1.7.10.jar';

 

filename jwt11 'H:\SASDATA\02. BACK\ETC\TEST\guava-11.0.2.jar';

filename jwt14 'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/opencsv-5.0.jar';

 

 

proc groovy;

add classpath=cp;

add classpath=jwt1;

add classpath=jwt2;

add classpath=jwt3;

add classpath=jwt4;

add classpath=jwt5;

add classpath=jwt6;

add classpath=jwt7;

add classpath=jwt8;

add classpath=jwt9;

add classpath=jwt10;

add classpath=jwt11;

    add classpath=jwt14;

    

    submit;

    import groovy.sql.Sql;

    import java.util.List;

    import org.apache.hive.jdbc.HiveDriver;

    import com.opencsv.CSVWriter;

    

    /* 접속 정보 */

    def db = [url: 'jdbc:hive2://ip(수정).ip(수정).co.kr:10000/default;',user:'hive',password:'패스워드', driver:'org.apache.hive.jdbc.HiveDriver']

    def sql = Sql.newInstance(db.url, db.user, db.password, db.driver)

      

        /* 추출을 위한 SQL 구문 */

        List id = sql.rows(''' SELECT unique_key,

                                     speaker,

                                     pattern_1,

                                     pattern_2,

                                     pattern_3

                              FROM   t_analsys_text_org

                              WHERE  input_date between "2019-11-01 00:00:00" and "2019-11-01 23:59:59"

                                AND  transcription_content != ''

                              LIMIT 200000 ''')

      

        /* exports.putAt('ID',id); */

        /* 출력 파일 지정 */

        String csv = "H:\\SASDATA\\SA_AUTO_RENEWAL.csv";

        CSVWriter writer = new CSVWriter(new FileWriter(csv));

      

        /* CSV 파일 header 정보 지정 */

        String [] header = new String[5];

        header[0]  = "unique_key";

        header[1]  = "speaker";

        header[2]  = "pattern_1";

        header[3]  = "pattern_2";

        header[4]  = "pattern_3";

        writer.writeNext(header);

      

      id.each {

        String [] record = new String[5];

        record[0]  = it.unique_key.toString();

        record[1]  = it.speaker.toString();

        record[2]  = it.pattern_1.toString();

        record[3]  = it.pattern_2.toString();

        record[4]  = it.pattern_3.toString();

        writer.writeNext(record);

      }         

      

      writer.close();

    endsubmit;

quit;

run;

      

      

/* 검증용 */

/* 

data _NULL_;

      put "----> &ID";

run; 

 

 

* 출처 : http://statwith.com/sas%ec%97%90%ec%84%9c-hadoop-hive-%eb%8d%b0%ec%9d%b4%ed%84%b0-%ec%b6%94%ec%b6%9c...

 

* 통계분석연구회 : http://cafe.daum.net/statsas

Version history
Last update:
‎06-15-2020 04:28 AM
Updated by:
Contributors

sas-innovate-wordmark-2025-midnight.png

Register Today!

Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.


Register now!

Article Labels
Article Tags