Matching to another data set

Reply
Frequent Contributor
Posts: 128

Matching to another data set

I brought in the csv files (had to split them up into three csv files because it kept aborting during FTP transfer with it all in one file). 

 

What I'm trying to do is bring in ALL the fields from the INST and NINST data pull (each have the same fields - this is coming from a claims database split into institutional and noninstitutional claims) and match it on CLMBASEID.

 

I started to type out what I think the match should look like, but not sure if I'm on the right track or not:

 

proc sort data=ninst.data  out=ninst2.data;  
   by clmbaseid;
run;

proc sort data=inst.data  out=inst2.data;
   by clmbaseid;
run;

data clmwh.data;  **claims warehouse**
  merge ninst2 (in=a)
             inst2 (in=b);
  by clmbaseid;
  if a and b;
run;

 

//CLMIDS  JOB (F0000000,44785,xxxxxxx,A2D),'xxxxxxx',                   
 //         CLASS=1,MSGCLASS=X,REGION=8M,TIME=NOLIMIT,                   
 //         RESTART=JS010.SAS,                                           
 //         NOTIFY=xxxxxxx
 //JS010    EXEC SAS,WORK='130000,130000',PARM=CAPSOUT                   
 //*                                                                     
 //INST   DD DSN=xxxxxxx.WRKLIB1.INST.CLMIDS.SASS,DISP=SHR               
 //PROJECT DD DSN=xxxxxxx.WRKLIB1.PROJECT.CLMIDS.SASS,DISP=SHR           
 //PROJECT1 DD DSN=xxxxxxx.PROJECT1.CSV,DISP=SHR                         
 //PROJECT2 DD DSN=xxxxxxx.PROJECT2.CSV,DISP=SHR                         
 //PROJECT3 DD DSN=xxxxxxx.PROJECT3.CSV,DISP=SHR                         
 //*                                                                     
 //SYSIN    DD *                                                         
 ********************************************************************;   
   OPTIONS NODATE LS=132 OBS=MAX SORT=2430 SORTPGM=BEST                  
           PS=59 NOBYLINE YEARCUTOFF=1920;                               
                                                                         
      **IMPORT RECOUPEMENTPROJECT EXCEL FILE**                           
       DATA PROJECT1;                                                    
         INFILE PROJECT1 DLM=',' DSD FIRSTOBS=2;                         
           INPUT FINARR      $2.                                         
               PROCESSTYPE $5.                                           
               TAXID       14                                            
               PSO         $1.                                           
               DEBTORNM    $35.                                          
               RCNNUM      $11.                                   
               CLMBASEID   $9.                                    
         RUN;                                                     
       DATA PROJECT2;                                             
         INFILE PROJECT2 DLM=',' DSD FIRSTOBS=2;                  
           INPUT FINARR      $2.                                  
               PROCESSTYPE $5.                                    
               TAXID       14                                     
               PSO         $1.                                    
               DEBTORNM    $35.                                   
               RCNNUM      $11.                                   
               CLMBASEID   $9.                                    
         RUN;                                                     
                                                                  
       DATA PROJECT3;                                             
         INFILE PROJECT1 DLM=',' DSD FIRSTOBS=2;                  
           INPUT FINARR      $2.                                  
               PROCESSTYPE $5.                                    
               TAXID       14                                     
               PSO         $1.                                    
               DEBTORNM    $35.                                   
               RCNNUM      $11.                                   
               CLMBASEID   $9.                                    
         RUN;                                                     
                                                                  
      DATA PROJECT.DATA;                                          
        MERGE PROJECT1 PROJECT2 PROJECT3;                                
        BY CLMBASEID;                                                    
        RUN;                                                             
                                                                         
      PROC PRINT DATA=PROJECT.DATA;                                      
         TITLE 'IMPORT RECOUPEMENT PROJECT';                             
      **NOTE - S/H 55744 DATA LINES**                                    
                                                                         
                                                                         
 //********************************************************************  
 //JS011    EXEC SAS,WORK='130000,130000',PARM=CAPSOUT                   
 //*                                                                     
 //INST  DD DSN=xxxxxxx.WRKLIB1.INSTCLMS.CLMIDS.SASS,                    
 //         DISP=(,CATLG,DELETE)                                         
 //*                                                                     
 //SYSIN    DD *                                                         
 ********************************************************************;   
   OPTIONS NODATE LS=132 OBS=MAX SORT=2430 SORTPGM=BEST                  
           PS=59 NOBYLINE YEARCUTOFF=1920;                               
                                                                         
      **EXTRACT FROM CLAIMS WAREHOUSE - INST**                           
         PROC SQL;                                                       
           CONNECT TO DB2(SSID=DBR0);                                    
             EXECUTE(SET CURRENT DEGREE='ANY') BY DB2;                   
                                                                         
           CREATE TABLE INST.DATA AS SELECT * FROM CONNECTION TO DB2                             
              (SELECT                                                    
                 HD.SR_PRIME_RCD_DATE AS CLMRECDT,                       
                 LN.CLM_BGN_SRVC_DT AS CLMSVCDT,                         
                 LN.CLM_END_SRVC_DT AS CLMENDDT,                         
                 LN.SPONSOR_SSN AS SPONSORSSN,                           
                 LN.PATNT_AMMS_ID_NO AS PATIENTID,                       
                 HD.PATNT_LAST_NAME AS PTLASTNAME,                       
                 HD.PATNT_FIRST_NAME AS PT FIRSTNAME,                    
                 HD.PATNT_DOB AS PTDOB,                                  
                 HD.PATNT_ADDR_LN_1 AS PTADD1,                           
                 HD.PATNT_ADDR_LN_2 AS PTADD2,                           
                 HD.PATNT_CITY AS PTCITY,                                
                 HD.PATNT_STATE AS PTSTATE,                              
                 HD.PATNT_ZIP_CODE AS PTZIP,                             
                 HD.ALLOWED_CHRG AS ALLCHRG,                             
                 LN.APRVD_AMT AS APRVDAMT,                               
                 LN.PAYMENT_AMT AS PYMTAMT,                              
                 HD.BLG_PRVDR_NAME AS BPROVNAME,                         
                 HD.BLG_PRVDR_NPI AS BPROVNPI,                           
                 HD.TAX_ID_NBR AS BPROVTIN,                              
                 HD.BLG_PRVDR_ST_CD AS BPROVST,                          
                 HD.BLG_PRVDR_ZIP_CD AS BPROVZIP,                        
                 HD.BLG_PRVDR_ADDR1 AS BPROVADD1,                        
                 HD.BLG_PRVDR_ADDR2 AS BPROVADD2,                        
                 LN.DRVD_RNDRG_PRV_NPI AS RPROVDVDNPI,         
             SELECT * FROM CONNECTION TO DB2                             
              (SELECT                                                    
                 HD.SR_PRIME_RCD_DATE AS CLMRECDT,                       
                 LN.CLM_BGN_SRVC_DT AS CLMSVCDT,                         
                 LN.CLM_END_SRVC_DT AS CLMENDDT,                         
                 LN.SPONSOR_SSN AS SPONSORSSN,                           
                 LN.PATNT_AMMS_ID_NO AS PATIENTID,                       
                 HD.PATNT_LAST_NAME AS PTLASTNAME,                       
                 HD.PATNT_FIRST_NAME AS PT FIRSTNAME,                    
                 HD.PATNT_DOB AS PTDOB,                                  
                 HD.PATNT_ADDR_LN_1 AS PTADD1,                           
                 HD.PATNT_ADDR_LN_2 AS PTADD2,                           
                 HD.PATNT_CITY AS PTCITY,                                
                 HD.PATNT_STATE AS PTSTATE,                              
                 HD.PATNT_ZIP_CODE AS PTZIP,                             
                 HD.ALLOWED_CHRG AS ALLCHRG,                             
                 LN.APRVD_AMT AS APRVDAMT,                               
                 LN.PAYMENT_AMT AS PYMTAMT,                              
                 HD.BLG_PRVDR_NAME AS BPROVNAME,                         
                 HD.BLG_PRVDR_NPI AS BPROVNPI,                           
                 HD.TAX_ID_NBR AS BPROVTIN,                              
                 HD.BLG_PRVDR_ST_CD AS BPROVST,                          
                 HD.BLG_PRVDR_ZIP_CD AS BPROVZIP,                        
                 HD.BLG_PRVDR_ADDR1 AS BPROVADD1,                        
                 HD.BLG_PRVDR_ADDR2 AS BPROVADD2,                        
                 LN.DRVD_RNDRG_PRV_NPI AS RPROVDVDNPI,                   
                 LN.REND_PROVIDER AS RPROVNBR,                           
                 LN.RNDRG_PRVDR_NPI AS RPROVNPI,                         
                 LN.RNDRG_PRV_FST_NAME AS RPROVFNAME,                    
                 LN.RNDRG_PRV_LST_NAME AS RPROVLNAME,                    
                 HD.PAY_TO_IND AS PAYTOIND,                              
                 HD.OHI_SUSPCTD_IND AS OHIIND,                           
                 HD.OHI_ALLWD_AMT AS OHIALWDAMT,                         
                 HD.TPL_ALLOW_AMT AS TPLALWDAMT,                         
                 HD.CHK_NUMBER AS CHECKNUM,                              
                 HD.CHK_NUMBER_2 AS CHECKNUM2,                           
                 HD.CLM_ADJ_ID AS CLMADJID,                              
                 HD.CLM_BASE_ID AS CLMBASEID                             
               FROM                                                      
                 LN INNER JOIN HD ON                                     
               (LN.CLM_BASE_ID=HD.CLM_BASE_ID AND                        
               LN.CLM_ADJ_ID=HD.CLM_ADJ_ID AND                           
               LN.CLM_TRANS_TYP=HD.CLM_TRANS_TYP                         
               AND LN.CHAP_CLM_SUB_NO=HD.CHAP_CLM_SUB_NO)                
                                                                         
               );                                                        
                                                                         
         %PUT &SQLXMSG &SQLXRC;                                          
                                                                         
         PROC PRINT UNIFORM DATA=INSTCLMS.DATA (OBS=10);                 
           TITLE1 "10 OBS FROM xxxxxxx.WRKLIB1.INSTCLMS.CLMIDS.SASS";    
           TITLE2                                                        
             "RECOUPEMENT SPECIAL PROJECT";                              
           TITLE3 "xxxxxxx.WRKLIB1.SAS(CLMIDS) - JS011";                 
         RUN;                                                            
                                                                                   
          
Super User
Posts: 10,278

Re: Matching to another data set

Don't use merge, use set:

data clmwh.data;  **claims warehouse**
set
  ninst2
  inst2
;
by clmbaseid;
run;

With the by statement, the two datasets will be interleaved and the sort order preserved.

---------------------------------------------------------------------------------------------
Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
How to post code
Frequent Contributor
Posts: 128

Re: Matching to another data set

Posted in reply to KurtBremser

Thank you!  Now I am wondering if I'm on the right track with this part below:

 

I need ALL of the fields from claims to come in and add to the original csv file, matching on clmbaseid.  This is what I have so far....

 

 OPTIONS NODATE LS=132 OBS=MAX SORT=2430 SORTPGM=BEST            
         PS=59 NOBYLINE YEARCUTOFF=1920;                         
                                                                 
  **COMBINE NINST2 AND INST2**                                   
                                                                 
  PROC SORT DATA=NINST.DATA  OUT=NINST2.DATA;                    
     BY CLMBASEID;                                               
  RUN;                                                           
                                                                 
  PROC SORT DATA=INST.DATA  OUT=INST2.DATA;                      
     BY CLMBASEID;                                               
  RUN;                                                           
                                                                 
  DATA CLMWH.DATA;                                               
   SET NINST2                                                    
       INST2;                                                    
   BY CLMBASEID;                                                 
  RUN;                                                           
                                                                 
  PROC SORT DATA=CLMWH.DATA OUT=CLMWH2.DATA; BY CLMBASEID; RUN;  
                                                                 
  PROC SORT DATA=PROJECT.DATA OUT=FINPROJ.DATA BY CLMBASEID; RUN;
                                                                 
  **MERGE CLMWH2 AND FINPROJ**                                   
  DATA CLMPROJ;                                        
   MERGE CLMWH2.DATA(IN=A)                             
         FINPROJ.DATA (IN=B);                          
   BY CLMBASEID;                                       
   IF A AND B;                                         
   RUN;                                                
                                                       
  PROC SQL                                             
   CREATE TABLE REC1.DATA AS                           
    SELECT ALL                                         
     FROM CLMWH.DATA AS A INNERJOIN CLMPROJ.DATA AS B  
       ON A.CLMBASEID=B.CLMBASEID; QUIT;  
Ask a Question
Discussion stats
  • 2 replies
  • 69 views
  • 0 likes
  • 2 in conversation