SAS에서 Hadoop Access 관련 라이센스가 없는 경우 hive-jdbc.jar 파일을 통하여 접속 후 데이터 추출 방식;
* Hadoop 관리 IT 담당자 협조 필요
* Mvnrepository : https://mvnrepository.com/
* http://kshitish-bigdata.blogspot.com/2016/02/hive-jdbc-connection-and-jar-file-errors.html;
* JAR FILE PATH
* a. hive-jdbc.jar /usr/lib/hive/lib
* b. hive-service /usr/lib/hive/lib
* c. libthrift-0.9.2 /usr/lib/hive/lib
* d. commons-logging /usr/lib/hive/lib
* e. httpclient-4.2.5 /usr/lib/hive/lib
* f. httpcore /usr/lib/hive/lib
* g. hive-common /usr/lib/hive/lib
* h. hive-metastore.jar /usr/lib/hive/lib
* i. hadoop-common /usr/lib/hadoop/client-0.20
* j. slf4j-api /usr/lib/hadoop/client
* k. log4j /usr/lib/hadoop/lib
* l. slf4j-log4j /usr/lib/hadoop/lib;
* /usr/hdp/2.4.0.0-169/hadoop/client;
* /usr/hdp/2.4.0.0-169/hive/lib;
filename cp temp;
filename jwt1 'H:\SASDATA\02. BACK\ETC\TEST/hive-jdbc-1.2.1000.2.4.0.0-169.jar';
filename jwt2 'H:\SASDATA\02. BACK\ETC\TEST/hive-service-1.2.1000.2.4.0.0-169.jar';
filename jwt3 'H:\SASDATA\02. BACK\ETC\TEST/libthrift-0.9.2.jar';
filename jwt4 'H:\SASDATA\02. BACK\ETC\TEST/httpclient-4.4.jar';
filename jwt5 'H:\SASDATA\02. BACK\ETC\TEST/httpcore-4.4.jar';
filename jwt6 'H:\SASDATA\02. BACK\ETC\TEST/commons-logging-1.1.3.jar';
filename jwt7 'H:\SASDATA\02. BACK\ETC\TEST/hive-metastore-1.2.1000.2.4.0.0-169.jar';
filename jwt8 'H:\SASDATA\02. BACK\ETC\TEST/hive-common-1.2.1000.2.4.0.0-169.jar';
filename jwt9 'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/hadoop-common-3.2.1.jar';
filename jwt10 'H:\SASDATA\02. BACK\ETC\TEST\slf4j-api-1.7.10.jar';
filename jwt11 'H:\SASDATA\02. BACK\ETC\TEST\guava-11.0.2.jar';
filename jwt14 'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/opencsv-5.0.jar';
proc groovy;
add classpath=cp;
add classpath=jwt1;
add classpath=jwt2;
add classpath=jwt3;
add classpath=jwt4;
add classpath=jwt5;
add classpath=jwt6;
add classpath=jwt7;
add classpath=jwt8;
add classpath=jwt9;
add classpath=jwt10;
add classpath=jwt11;
add classpath=jwt14;
submit;
import groovy.sql.Sql;
import java.util.List;
import org.apache.hive.jdbc.HiveDriver;
import com.opencsv.CSVWriter;
/* 접속 정보 */
def db = [url: 'jdbc:hive2://ip(수정).ip(수정).co.kr:10000/default;',user:'hive',password:'패스워드', driver:'org.apache.hive.jdbc.HiveDriver']
def sql = Sql.newInstance(db.url, db.user, db.password, db.driver)
/* 추출을 위한 SQL 구문 */
List id = sql.rows(''' SELECT unique_key,
speaker,
pattern_1,
pattern_2,
pattern_3
FROM t_analsys_text_org
WHERE input_date between "2019-11-01 00:00:00" and "2019-11-01 23:59:59"
AND transcription_content != ''
LIMIT 200000 ''')
/* exports.putAt('ID',id); */
/* 출력 파일 지정 */
String csv = "H:\\SASDATA\\SA_AUTO_RENEWAL.csv";
CSVWriter writer = new CSVWriter(new FileWriter(csv));
/* CSV 파일 header 정보 지정 */
String [] header = new String[5];
header[0] = "unique_key";
header[1] = "speaker";
header[2] = "pattern_1";
header[3] = "pattern_2";
header[4] = "pattern_3";
writer.writeNext(header);
id.each {
String [] record = new String[5];
record[0] = it.unique_key.toString();
record[1] = it.speaker.toString();
record[2] = it.pattern_1.toString();
record[3] = it.pattern_2.toString();
record[4] = it.pattern_3.toString();
writer.writeNext(record);
}
writer.close();
endsubmit;
quit;
run;
/* 검증용 */
/*
data _NULL_;
put "----> &ID";
run;
* 통계분석연구회 : http://cafe.daum.net/statsas
Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.