SAS에서 Hadoop Access 관련 라이센스가 없는 경우 hive-jdbc.jar 파일을 통하여 접속 후 데이터 추출 방식;
* Hadoop 관리 IT 담당자 협조 필요
* Mvnrepository : https://mvnrepository.com/
* http://kshitish-bigdata.blogspot.com/2016/02/hive-jdbc-connection-and-jar-file-errors.html;
* JAR FILE PATH
* a. hive-jdbc.jar /usr/lib/hive/lib
* b. hive-service /usr/lib/hive/lib
* c. libthrift-0.9.2 /usr/lib/hive/lib
* d. commons-logging /usr/lib/hive/lib
* e. httpclient-4.2.5 /usr/lib/hive/lib
* f. httpcore /usr/lib/hive/lib
* g. hive-common /usr/lib/hive/lib
* h. hive-metastore.jar /usr/lib/hive/lib
* i. hadoop-common /usr/lib/hadoop/client-0.20
* j. slf4j-api /usr/lib/hadoop/client
* k. log4j /usr/lib/hadoop/lib
* l. slf4j-log4j /usr/lib/hadoop/lib;
* /usr/hdp/2.4.0.0-169/hadoop/client;
* /usr/hdp/2.4.0.0-169/hive/lib;
filename cp temp;
filename jwt1 'H:\SASDATA\02. BACK\ETC\TEST/hive-jdbc-1.2.1000.2.4.0.0-169.jar';
filename jwt2 'H:\SASDATA\02. BACK\ETC\TEST/hive-service-1.2.1000.2.4.0.0-169.jar';
filename jwt3 'H:\SASDATA\02. BACK\ETC\TEST/libthrift-0.9.2.jar';
filename jwt4 'H:\SASDATA\02. BACK\ETC\TEST/httpclient-4.4.jar';
filename jwt5 'H:\SASDATA\02. BACK\ETC\TEST/httpcore-4.4.jar';
filename jwt6 'H:\SASDATA\02. BACK\ETC\TEST/commons-logging-1.1.3.jar';
filename jwt7 'H:\SASDATA\02. BACK\ETC\TEST/hive-metastore-1.2.1000.2.4.0.0-169.jar';
filename jwt8 'H:\SASDATA\02. BACK\ETC\TEST/hive-common-1.2.1000.2.4.0.0-169.jar';
filename jwt9 'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/hadoop-common-3.2.1.jar';
filename jwt10 'H:\SASDATA\02. BACK\ETC\TEST\slf4j-api-1.7.10.jar';
filename jwt11 'H:\SASDATA\02. BACK\ETC\TEST\guava-11.0.2.jar';
filename jwt14 'H:\SASDATA\02. BACK\ETC\TEST\MVNrepository/opencsv-5.0.jar';
proc groovy;
add classpath=cp;
add classpath=jwt1;
add classpath=jwt2;
add classpath=jwt3;
add classpath=jwt4;
add classpath=jwt5;
add classpath=jwt6;
add classpath=jwt7;
add classpath=jwt8;
add classpath=jwt9;
add classpath=jwt10;
add classpath=jwt11;
add classpath=jwt14;
submit;
import groovy.sql.Sql;
import java.util.List;
import org.apache.hive.jdbc.HiveDriver;
import com.opencsv.CSVWriter;
/* 접속 정보 */
def db = [url: 'jdbc:hive2://ip(수정).ip(수정).co.kr:10000/default;',user:'hive',password:'패스워드', driver:'org.apache.hive.jdbc.HiveDriver']
def sql = Sql.newInstance(db.url, db.user, db.password, db.driver)
/* 추출을 위한 SQL 구문 */
List id = sql.rows(''' SELECT unique_key,
speaker,
pattern_1,
pattern_2,
pattern_3
FROM t_analsys_text_org
WHERE input_date between "2019-11-01 00:00:00" and "2019-11-01 23:59:59"
AND transcription_content != ''
LIMIT 200000 ''')
/* exports.putAt('ID',id); */
/* 출력 파일 지정 */
String csv = "H:\\SASDATA\\SA_AUTO_RENEWAL.csv";
CSVWriter writer = new CSVWriter(new FileWriter(csv));
/* CSV 파일 header 정보 지정 */
String [] header = new String[5];
header[0] = "unique_key";
header[1] = "speaker";
header[2] = "pattern_1";
header[3] = "pattern_2";
header[4] = "pattern_3";
writer.writeNext(header);
id.each {
String [] record = new String[5];
record[0] = it.unique_key.toString();
record[1] = it.speaker.toString();
record[2] = it.pattern_1.toString();
record[3] = it.pattern_2.toString();
record[4] = it.pattern_3.toString();
writer.writeNext(record);
}
writer.close();
endsubmit;
quit;
run;
/* 검증용 */
/*
data _NULL_;
put "----> &ID";
run;
* 통계분석연구회 : http://cafe.daum.net/statsas
April 27 – 30 | Gaylord Texan | Grapevine, Texas
Walk in ready to learn. Walk out ready to deliver. This is the data and AI conference you can't afford to miss.
Register now and save with the early bird rate—just $795!