options nonotes nosource; cas mySession sessopts=(CASlib=casuser timeout=100000 locale="en_US"); run; caslib _all_ assign; quit; proc sql; %if %sysfunc(exist(V0096C01.'DATA'n)) %then %do; drop table V0096C01.'DATA'n; %end; %if %sysfunc(exist(V0096C01.'DATA'n,VIEW)) %then %do; drop view V0096C01.'DATA'n; %end; quit; options validvarname=any; FILENAME REFFILE DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/Data_withtheme2.xlsx' lrecl=1000000; PROC IMPORT DATAFILE=REFFILE DBMS=xlsx OUT=V0096C01.'DATA'n; GETNAMES=YES; RUN; proc sql; %if %sysfunc(exist(V0096C01.termnum)) %then %do; drop table V0096C01.termnum; %end; %if %sysfunc(exist(V0096C01.termnum,VIEW)) %then %do; drop view V0096C01.termnum; %end; quit; FILENAME REFFILE DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/termnum.csv'; PROC IMPORT DATAFILE=REFFILE DBMS=CSV OUT=V0096C01.termnum; GETNAMES=YES; RUN; PROC CONTENTS DATA=V0096C01.termnum; RUN; proc sql; select max(_TERMNUM_) INTO:term from V0096C01.termnum; quit; proc sql; %if %sysfunc(exist(V0096C01.'DATA1'n)) %then %do; drop table V0096C01.'DATA1'n; %end; %if %sysfunc(exist(V0096C01.'DATA1'n,VIEW)) %then %do; drop view V0096C01.'DATA1'n; %end; quit; FILENAME REFFILE DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/Data_withtheme2.csv' lrecl=1000000; PROC IMPORT DATAFILE=REFFILE DBMS=CSV OUT=V0096C01.'DATA1'n; GETNAMES=YES; RUN; PROC CONTENTS DATA=V0096C01.'DATA1'n; RUN; proc sql; %if %sysfunc(exist(V0096C01.'DATA2'n)) %then %do; drop table V0096C01.'DATA2'n; %end; %if %sysfunc(exist(V0096C01.'DATA2'n,VIEW)) %then %do; drop view V0096C01.'DATA2'n; %end; quit; FILENAME REFFILE DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/Data_withtheme2.csv' lrecl=1000000; PROC IMPORT DATAFILE=REFFILE DBMS=CSV OUT=V0096C01.'DATA2'n; GETNAMES=YES; RUN; PROC CONTENTS DATA=V0096C01.'DATA2'n; RUN; proc sql; %if %sysfunc(exist(V0096C01.'DATA3'n)) %then %do; drop table V0096C01.'DATA3'n; %end; %if %sysfunc(exist(V0096C01.'DATA3'n,VIEW)) %then %do; drop view V0096C01.'DATA3'n; %end; quit; FILENAME REFFILE DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/Data-withtheme2.csv' lrecl=1000000; PROC IMPORT DATAFILE=REFFILE DBMS=CSV OUT=V0096C01.'DATA3'n; GETNAMES=YES; RUN; PROC CONTENTS DATA=V0096C01.'DATA3'n; RUN; ods noproctitle; %macro create(howmany); %LET x=1; %LET term=&term; %LET indeed=%sysfunc(open(V0096C01.DATA)); %DO %WHILE (%sysfunc(varnum(&indeed,cconverted&x.))>0); %LET y=%sysevalf(&x-1); %put &x; %IF &x>1 %THEN %DO; data _null_; if 0 then set V0096C01.modified&y. nobs=n; put n; stop; run; %END; %IF &x=1 %then %do; proc gradboost data=V0096C01.DATA maxdepth=50 outmodel=V0096C01.DATA2; partition fraction(validate=0.12 test=0.12); target cconverted&x. / level=nominal; input hello1-hello&term./ level=interval; input 'Feedback Type'n 'Feedback Channel'n 'Digital Product'n 'Service Type'n 'Service Detail'n 'Reason Level 1'n 'Feedback Provider Type'n 'Responsible group'n 'Reason Level 2'n/ level=nominal; autotune tuningparameters=(ntrees(init=150) samplingrate vars_to_try(UB=1000 init=300) learningrate lasso ridge) objective=misc; ods output TunerResults=_tempTuneResults_; ods output EvaluationHistory=_tempEvalHistory_; ods output IterationHistory=_tempIterHistory_; ods output FitStatistics=Work._Gradboost_FitStats_; score out=V0096C01.DATA1 copyvars=(cconverted&x. Index ); savestate rstore=V0096C01.DATA3; id Index; run; %end; %else %do; proc gradboost data=V0096C01.modified&y. maxdepth=50 outmodel=V0096C01.DATA2; partition fraction(validate=0.12 test=0.12); target cconverted&x. / level=nominal; input hello1-hello&term./ level=interval; input I_cconverted1-I_cconverted&y. 'Feedback Type'n 'Feedback Channel'n 'Digital Product'n 'Service Type'n 'Service Detail'n 'Reason Level 1'n 'Feedback Provider Type'n 'Responsible group'n 'Reason Level 2'n/ level=nominal; autotune tuningparameters=(ntrees(init=150) samplingrate vars_to_try(UB=1000 init=300) learningrate lasso ridge) objective=misc; ods output TunerResults=_tempTuneResults_; ods output EvaluationHistory=_tempEvalHistory_; ods output IterationHistory=_tempIterHistory_; ods output FitStatistics=Work._Gradboost_FitStats_; score out=V0096C01.DATA1 copyvars=(cconverted&x. Index ); savestate rstore=V0096C01.DATA3; id Index; run; %end; proc sgplot data=Work._Gradboost_FitStats_; title3 'Misclassifications by Number of Trees'; title4 'Training vs. Validation'; series x=Trees y=MiscTrain; series x=Trees y=MiscValid /lineattrs=(pattern=dot thickness=2); yaxis label='Misclassification Rate'; label Trees='Number of Trees'; label MiscTrain='Training'; label MiscValid='Validation'; run; title3; title 'Tune with Genetic algorithm'; data _null_; set _tempTuneResults_; if _n_=1 then call symput('default', misclasserr); if _n_=2 then call symput('best', misclasserr); run; data _tempEvalHistory_; set _tempEvalHistory_; default=&default; best=&best; run; proc sgplot data=_tempEvalHistory_; scatter x=evaluation y=misclasserr / group=iteration; series x=evaluation y=default/curvelabel="default" lineattrs=(pattern=shortdash); series x=evaluation y=best / curvelabel="best" lineattrs=(pattern=shortdash color=green); run; proc sgplot data=_tempIterHistory_; series x=iteration y=best_obj; series x=iteration y=time_sec / y2axis; run; proc delete data=_tempTuneResults_ _tempEvalHistory_ _tempIterHistory_; run; proc delete data=Work._Gradboost_FitStats_; run; proc astore; download rstore=V0096C01.DATA3 store="/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/svm1store&x.sasast"; quit; %IF &x=1 %then %do; proc SQL; create table modified&x. as select * from V0096C01.DATA a INNER JOIN V0096C01.DATA1 b ON a.index=b.index; QUIT; DATA V0096C01.modified&x.; Set modified&x.; Run; %end; %ELSE %DO; proc SQL; create table modified&x. as select * from V0096C01.modified&y. a INNER JOIN V0096C01.DATA1 b ON a.index=b.index; QUIT; DATA V0096C01.modified&x.; Set modified&x.; Run; %END; proc sql; %if %sysfunc(exist(V0096C01.'DATA1'n)) %then %do; drop table V0096C01.'DATA1'n; %end; %if %sysfunc(exist(V0096C01.'DATA1'n,VIEW)) %then %do; drop view V0096C01.'DATA1'n; %end; quit; FILENAME REFFILE DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/Data_withtheme2.csv' lrecl=1000000; PROC IMPORT DATAFILE=REFFILE DBMS=CSV OUT=V0096C01.'DATA1'n; GETNAMES=YES; RUN; %LET x=%sysevalf(&x+1); %end; %Let y=%sysevalf(&x-1); DATA targetnumber; number=&y; RUN; %mend create; %create(10); filename xout DISK '/sasdata/dev/VEA0096/Data/SASLib/SHEI/DATA_SCIENCE/targetnumber.csv' lrecl=1000000; proc export file=xout data=targetnumber dbms=xlsx replace; run;