Hi Everyone, I have a big data with 60,000,000 rows. The code is very slow, I tried to index some columns but it is still very slow. Any advice? should I use proc sql or data step for big data tables? Proc sql; create table adherant as select A.*, B.Fr_var from ses_id.INFORCE_AJUST_1 as A left join ses_id.FR_VARIABLE as B ON A.no_garantie = B.no_garantie; quit; proc sql; create index no_garantie on adherant (no_garantie); Quit; /* Ajout Frais administratif fixe */ Proc sql; create table adherant as select * from adherant as A left join ses_id.FR_Fixe as B ON A.no_garantie=B.no_garantie; quit; /* On rajoute la table des scenarios economiques */ /* On commence par rajouter un id de 1 a 1000, qui correspond aux IDs des scenarios */ data adherant ; set adherant; do scn_id=1 to 1000; output; end; run; proc sql; create index scn_id on adherant (scn_id); Quit; /* On rajoute les scenarios economiques */ Proc sql; create table adherant (drop=scn_id) as select A.* ,B.RendDEX as rend1 ,B.RendMM as rend2 ,B.RendTSX as rend3 ,B.RendSP500 as rend4 ,B.RendEAFE as rend5 ,B.TaskNum ,B.Time from adherant as A left join ses_id.Scenarios_ECN as B ON A.scn_id = B.TaskNum where Time>0 ; quit; /* Calcul de l'age par mois de scenarios economique, l'age doit etre entier et seulement s'incrementer de 1 au 12eme mois */ data adherant2 ; set adherant; age_adh = age_adh_ori + int((time-1)/12) ; Evaluation_dt = INTNX('month', &date_eval., (time-1), 'END'); duree_IE = min(max(int(YRDIF(date_acq_moyenne_IE,Evaluation_dt,'AGE'))+1,1),6); lapse_duree = min(max(int(YRDIF(Issue_Date,Evaluation_dt,'AGE'))+1,1),10); run; /*Ajout table Taux forward */ Proc sql; create table adherant as select A.* , B.facteur_act as fct_tx_fwrd from adherant as A left join ses_id.Scn_tx_forward as B ON (A.TaskNum = B.TaskNum and A.Time = B.Time) ; quit;
... View more