<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: PROC SQL join based on a common Substring in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833410#M329457</link>
    <description>&lt;P&gt;Note: I have modified the code for A and B to include LENGTH statements.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data a;
    length dat $ 40;
     subj=1; dat='HI_76_KAM_BAK_887_PD34_KAR'; output;
     subj=2; dat='FT_01_S_NDRI_KAR'; output;
     subj=3; dat='DY_02_DEALIRITEON_843_KAR'; output;
     subj=4; dat='DY_02_RET_DEALIRITEON_ST_843_KAR'; output;
     subj=5; dat='BV_719_B_SIAFT_JH_500_KAR'; output;
     subj=6; dat='BV_303_B_SIAFT_500_KAR'; output;
     subj=7; dat='BV_711_B_SIAFT_KAR'; output;
     subj=8; dat='BV_714_XEMEYDE5_CESIC_890_CAS_KAR'; output;
     subj=9; dat='BV_714_MIL_XMD5_CSIC_CAS_KAR'; output;
     subj=10; dat='BV_714_TJIRSIC_CAS_KAR'; output;
     subj=11; dat='DY_30_KAM_BAK_YR_956_KAR'; output;
     subj=12; dat='DY_30_PIC_KAM_BAK_956_KAR'; output;
     subj=13; dat='AM_07_MH_KAR'; output;
     subj=14; dat='AM_53_JIDIDUM_DYDERS_KAR'; output;

    run;
    
    data b;
        length dat_new $ 24;
     id=1; dat_new='KAM_BAK_887'; other_var=1; output;
     id=2; dat_new='S_NDRI'; other_var=2; output;
     id=3; dat_new='DEALIRITEON_843'; other_var=3; output;
     id=4; dat_new='B_SIAFT_500'; other_var=11; output;
     id=5; dat_new='XEMEYDE5_CESIC_CAS'; other_var=21; output;
     id=6; dat_new='KAM_BAK_956'; other_var=25; output;
     id=7; dat_new='MH'; other_var=21; other_var=28; output;
     id=7; dat_new='JIDIDUM_DYDERS'; other_var=29; output;
run;

proc sql;
    create table want as select
        a.dat,
        b.dat_new
    from a left join b 
        on find(a.dat,b.dat_new,'t')&amp;gt;=1
        where not missing(b.dat_new);
quit;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The reason that INDEXW (and also FINDW) will not work is that in record one, the words it looks at with the '_' delimiter are HI 76 KAM BAK 887 etc and it never finds one that matches KAM_BAK.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The approach in my code will fail if in data set A, there are values of DAT that contain MH, for example, if DAT='AMHERST' this will match data set b ID=7 DAT_NEW='MH'. I do not have at this moment a way to get around this.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Maybe regular expressions will have a better solution, but I leave that to others.&lt;/P&gt;</description>
    <pubDate>Wed, 14 Sep 2022 19:15:12 GMT</pubDate>
    <dc:creator>PaigeMiller</dc:creator>
    <dc:date>2022-09-14T19:15:12Z</dc:date>
    <item>
      <title>PROC SQL join based on a common Substring</title>
      <link>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833402#M329451</link>
      <description>&lt;P&gt;Dear all,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have two tables A&amp;nbsp; and B&lt;/P&gt;&lt;PRE&gt;  data a;
     subj=1; dat='HI_76_KAM_BAK_887_PD34_KAR'; output;
     subj=2; dat='FT_01_S_NDRI_KAR'; output;
     subj=3; dat='DY_02_DEALIRITEON_843_KAR'; output;
     subj=4; dat='DY_02_RET_DEALIRITEON_ST_843_KAR'; output;
     subj=5; dat='BV_719_B_SIAFT_JH_500_KAR'; output;
     subj=6; dat='BV_303_B_SIAFT_500_KAR'; output;
     subj=7; dat='BV_711_B_SIAFT_KAR'; output;
     subj=8; dat='BV_714_XEMEYDE5_CESIC_890_CAS_KAR'; output;
     subj=9; dat='BV_714_MIL_XMD5_CSIC_CAS_KAR'; output;
     subj=10; dat='BV_714_TJIRSIC_CAS_KAR'; output;
     subj=11; dat='DY_30_KAM_BAK_YR_956_KAR'; output;
     subj=12; dat='DY_30_PIC_KAM_BAK_956_KAR'; output;
     subj=13; dat='AM_07_MH_KAR'; output;
     subj=14; dat='AM_53_JIDIDUM_DYDERS_KAR'; output;

    run;
    
    data b;
     id=1; dat_new='KAM_BAK_887'; other_var=1; output;
     id=2; dat_new='S_NDRI'; other_var=2; output;
     id=3; dat_new='DEALIRITEON_843'; other_var=3; output;
     id=4; dat_new='B_SIAFT_500'; other_var=11; output;
     id=5; dat_new='XEMEYDE5_CESIC_CAS'; other_var=21; output;
     id=6; dat_new='KAM_BAK_956'; other_var=25; output;
     id=7; dat_new='MH'; other_var=21; other_var=28; output;
     id=7; dat_new='JIDIDUM_DYDERS'; other_var=29; output;
run;&lt;/PRE&gt;&lt;P&gt;I need to appropriately join them and get the table WANT as follows:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;data want;
dat=HI_76_KAM_BAK_887_PD34_KAR;    dat_new=KAM_BAK_887;output; 
dat=FT_01_S_NDRI_KAR;              dat_new=S_NDRI;output;
dat=DY_02_DEALIRITEON_843_KAR;     dat_new=DEALIRITEON_843;output;
dat=BV_303_B_SIAFT_500_KAR;        dat_new=B_SIAFT_500;output;
dat=BV_714_MIL_XMD5_CSIC_CAS_KAR;  dat_new=XEMEYDE5_CESIC_CAS;output;
dat=DY_30_PIC_KAM_BAK_956_KAR;     dat_new=KAM_BAK_956;output;
dat=AM_07_MH_KAR;                  dat_new=MH;output;
dat=AM_53_JIDIDUM_DYDERS_KAR;      dat_new=JIDIDUM_DYDERS;output;
run;&lt;/PRE&gt;&lt;P&gt;I have tried to use something like this within a PROC SQL join but I have failed.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;ifc(indexw(strip(dat),strip(dat_new),'_'),'OK','NOT OK');&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Any suggestions would be more than welcome.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanking you in advance.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 14 Sep 2022 18:38:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833402#M329451</guid>
      <dc:creator>Zeus_Olympus</dc:creator>
      <dc:date>2022-09-14T18:38:17Z</dc:date>
    </item>
    <item>
      <title>Re: PROC SQL join based on a common Substring</title>
      <link>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833410#M329457</link>
      <description>&lt;P&gt;Note: I have modified the code for A and B to include LENGTH statements.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data a;
    length dat $ 40;
     subj=1; dat='HI_76_KAM_BAK_887_PD34_KAR'; output;
     subj=2; dat='FT_01_S_NDRI_KAR'; output;
     subj=3; dat='DY_02_DEALIRITEON_843_KAR'; output;
     subj=4; dat='DY_02_RET_DEALIRITEON_ST_843_KAR'; output;
     subj=5; dat='BV_719_B_SIAFT_JH_500_KAR'; output;
     subj=6; dat='BV_303_B_SIAFT_500_KAR'; output;
     subj=7; dat='BV_711_B_SIAFT_KAR'; output;
     subj=8; dat='BV_714_XEMEYDE5_CESIC_890_CAS_KAR'; output;
     subj=9; dat='BV_714_MIL_XMD5_CSIC_CAS_KAR'; output;
     subj=10; dat='BV_714_TJIRSIC_CAS_KAR'; output;
     subj=11; dat='DY_30_KAM_BAK_YR_956_KAR'; output;
     subj=12; dat='DY_30_PIC_KAM_BAK_956_KAR'; output;
     subj=13; dat='AM_07_MH_KAR'; output;
     subj=14; dat='AM_53_JIDIDUM_DYDERS_KAR'; output;

    run;
    
    data b;
        length dat_new $ 24;
     id=1; dat_new='KAM_BAK_887'; other_var=1; output;
     id=2; dat_new='S_NDRI'; other_var=2; output;
     id=3; dat_new='DEALIRITEON_843'; other_var=3; output;
     id=4; dat_new='B_SIAFT_500'; other_var=11; output;
     id=5; dat_new='XEMEYDE5_CESIC_CAS'; other_var=21; output;
     id=6; dat_new='KAM_BAK_956'; other_var=25; output;
     id=7; dat_new='MH'; other_var=21; other_var=28; output;
     id=7; dat_new='JIDIDUM_DYDERS'; other_var=29; output;
run;

proc sql;
    create table want as select
        a.dat,
        b.dat_new
    from a left join b 
        on find(a.dat,b.dat_new,'t')&amp;gt;=1
        where not missing(b.dat_new);
quit;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The reason that INDEXW (and also FINDW) will not work is that in record one, the words it looks at with the '_' delimiter are HI 76 KAM BAK 887 etc and it never finds one that matches KAM_BAK.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The approach in my code will fail if in data set A, there are values of DAT that contain MH, for example, if DAT='AMHERST' this will match data set b ID=7 DAT_NEW='MH'. I do not have at this moment a way to get around this.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Maybe regular expressions will have a better solution, but I leave that to others.&lt;/P&gt;</description>
      <pubDate>Wed, 14 Sep 2022 19:15:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833410#M329457</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2022-09-14T19:15:12Z</dc:date>
    </item>
    <item>
      <title>Re: PROC SQL join based on a common Substring</title>
      <link>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833412#M329458</link>
      <description>&lt;P&gt;What is the matching rule you wish to apply?&lt;/P&gt;</description>
      <pubDate>Wed, 14 Sep 2022 19:14:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/PROC-SQL-join-based-on-a-common-Substring/m-p/833412#M329458</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2022-09-14T19:14:14Z</dc:date>
    </item>
  </channel>
</rss>

