<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: if observation in one dataset is in another dataset in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866788#M342328</link>
    <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/19879"&gt;@Quentin&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Can I use the merge step for multiple dataset. For example:&lt;/P&gt;&lt;P&gt;data want;&lt;BR /&gt;merge&lt;BR /&gt;a (keep= id in=a)&lt;BR /&gt;b (keep= id in=b)&lt;BR /&gt;c (keep= id in=c)&lt;BR /&gt;d (keep= id in=d)&lt;BR /&gt;e (keep= id in=e)&lt;BR /&gt;; by id ;&lt;BR /&gt;ina=a;&lt;BR /&gt;inb=b;&lt;BR /&gt;inc=c;&lt;BR /&gt;ind=d;&lt;BR /&gt;ine=e;&lt;BR /&gt;run;&lt;/P&gt;</description>
    <pubDate>Tue, 28 Mar 2023 17:48:16 GMT</pubDate>
    <dc:creator>CathyVI</dc:creator>
    <dc:date>2023-03-28T17:48:16Z</dc:date>
    <item>
      <title>if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866773#M342324</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I have two large dataset A &amp;amp; B and they both have ID number. I want to check if ID number in data A are in data B or vise-versa. I want to do this before merging the data. I think I can use proc sql with distinct but am still learning sql and not sure with the coding. Please help.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 17:09:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866773#M342324</guid>
      <dc:creator>CathyVI</dc:creator>
      <dc:date>2023-03-28T17:09:35Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866777#M342325</link>
      <description>&lt;P&gt;Why don't&amp;nbsp; you want to merge the data? It's straight forward with merge, e.g.:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  merge 
    a (keep= id in=a)
    b (keep= id in=b)
  ;&lt;BR /&gt;  by id ;
  ina=a;
  inb=b;
run;

proc freq data=want;
  tables ina*inb ;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 28 Mar 2023 17:25:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866777#M342325</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2023-03-28T17:25:18Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866778#M342326</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as
select a.id, b.id, case when missing(b.id) then 'Not in B'
                                   when missing(a.id) then 'Not in A'
                                   else 'Both' end as status
from (select distinct id from table1) as a
full join (select distinct id from table2) as b
on a.id=b.id;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Something like that would work.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Merges also work but require you sort the data ahead of time.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 17:27:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866778#M342326</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2023-03-28T17:27:17Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866788#M342328</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/19879"&gt;@Quentin&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Can I use the merge step for multiple dataset. For example:&lt;/P&gt;&lt;P&gt;data want;&lt;BR /&gt;merge&lt;BR /&gt;a (keep= id in=a)&lt;BR /&gt;b (keep= id in=b)&lt;BR /&gt;c (keep= id in=c)&lt;BR /&gt;d (keep= id in=d)&lt;BR /&gt;e (keep= id in=e)&lt;BR /&gt;; by id ;&lt;BR /&gt;ina=a;&lt;BR /&gt;inb=b;&lt;BR /&gt;inc=c;&lt;BR /&gt;ind=d;&lt;BR /&gt;ine=e;&lt;BR /&gt;run;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 17:48:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866788#M342328</guid>
      <dc:creator>CathyVI</dc:creator>
      <dc:date>2023-03-28T17:48:16Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866795#M342329</link>
      <description>&lt;P&gt;This creates two data sets, I hope the names are descriptive enough, that contain any ID only in one set if such exist:&lt;/P&gt;
&lt;PRE&gt;proc sql;
   create table ina_notb as
   select distinct id from a
   except
   select distinct id from b
   ;
   create table inb_nota as
   select distinct id from b
   except
   select distinct id from a
   ;
quit;&lt;/PRE&gt;
&lt;P&gt;Except says "get everything from the first query except those in the second query".&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 17:53:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866795#M342329</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2023-03-28T17:53:05Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866798#M342330</link>
      <description>&lt;P&gt;Yes, you can merge multiple datasets in a single MERGE step.&amp;nbsp; Each dataset should be unique by ID.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;After the merge, can get a nice summary of missing data patterns with by using the LIST option:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc freq data=want ;
  tables ina*inb*inc*ind*ine /list;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 17:56:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866798#M342330</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2023-03-28T17:56:58Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866805#M342331</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/19879"&gt;@Quentin&lt;/a&gt;&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you. This is my code below. However, for efficiency is there a better way to practice this code of mine.&lt;/P&gt;&lt;P&gt;data out_reven_06_19;&lt;BR /&gt;merge a.out_reven_2006 (in=a)&lt;BR /&gt;a.out_reven_2007 (in=b)&lt;BR /&gt;a.out_reven_2008 (in=c)&lt;BR /&gt;a.out_reven_2009 (in=d)&lt;BR /&gt;a.out_reven_2010 (in=e)&lt;BR /&gt;a.out_reven_2011 (in=f)&lt;BR /&gt;a.out_reven_2012 (in=g)&lt;BR /&gt;a.out_reven_2013 (in=h)&lt;BR /&gt;a.out_reven_2014 (in=i)&lt;BR /&gt;a.out_reven_2015 (in=j)&lt;BR /&gt;a.out_reven_2016 (in=k)&lt;BR /&gt;a.out_reven_2017 (in=l)&lt;BR /&gt;a.out_reven_2018 (in=m)&lt;BR /&gt;a.out_reven_2019 (in=n)&lt;BR /&gt;;&lt;BR /&gt;by PATIENT_ID;&lt;BR /&gt;ina=a;&lt;BR /&gt;inb=b;&lt;BR /&gt;inc=c;&lt;BR /&gt;ind=d;&lt;BR /&gt;ine=e;&lt;BR /&gt;inf=f;&lt;BR /&gt;ing=g;&lt;BR /&gt;inh=h;&lt;BR /&gt;ini=i;&lt;BR /&gt;inj=j;&lt;BR /&gt;ink=k;&lt;BR /&gt;inl=l;&lt;BR /&gt;inm=m;&lt;BR /&gt;inn=n;&lt;BR /&gt;run;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 18:01:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866805#M342331</guid>
      <dc:creator>CathyVI</dc:creator>
      <dc:date>2023-03-28T18:01:02Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866810#M342332</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data ids;
set a b c d e indsname =source;
dsn=source;
keep id dsn;
run;

*deduplicate if desired;
proc sort data=ids nodupkey;
by id dsn;
run;

proc freq data=ids;
table id * dsn;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Another method where it doesn't matter how many data sets....&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 18:03:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866810#M342332</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2023-03-28T18:03:17Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866870#M342362</link>
      <description>&lt;P&gt;Fourteen data sets is quite a bit different than the two in your original post.&lt;/P&gt;
&lt;P&gt;This will create a report as the last step with the patient_id as a column and the data set names across the top with a 1 where the id is in the set.&lt;/P&gt;
&lt;PRE&gt;data out_reven_06_19;
   set a.out_reven_2006-a.out_reven_2019 
       indsname=dsname;
   source = dsname;
run;

proc sql;
   create idtable as
   select distinct patient_id, source
   from out_reven_06-19
   ;
quit;

proc tabulate data=idtable;
   class patient_id source;
   table patient_id,
         source*n=' '
         /misstext=' '
   ;
run;
&lt;/PRE&gt;
&lt;P&gt;The a.out_reven_2006-a.out_reven_2019&amp;nbsp; on the Set statement is a shorthand list of the data set names. The option INDSNAME adds a temporary variable to the data with the name of the contributing data set and the Source= creates a permanent variable.&lt;/P&gt;
&lt;P&gt;The IDtable gets one record per Patient_id and Source value.&lt;/P&gt;
&lt;P&gt;The Proc Tabulate step creates a report.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You haven't really stated what piece of information is actually needed so I have this generic report so you can see just about any result.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note that if you had asked for something like "which IDs are in ALL the sets" or "which Id are not all the sets" then we could do a summary count of the Patient_id and Source variable and select appropriate values to report on.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 20:16:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866870#M342362</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2023-03-28T20:16:50Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866881#M342368</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/19879"&gt;@Quentin&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I joined using the methods below but I got different numbers of observations &amp;amp; variables. Please why is that.&lt;/P&gt;&lt;P&gt;(1)&lt;/P&gt;&lt;P&gt;/*26,036,675 observations and 66 variables*/&lt;/P&gt;&lt;P&gt;data combined_out_reven;&amp;nbsp;&lt;BR /&gt;merge a.out_reven_2006 (in=a)&lt;BR /&gt;a.out_reven_2007 (in=b)&lt;BR /&gt;a.out_reven_2008 (in=c)&lt;BR /&gt;a.out_reven_2009 (in=d)&lt;BR /&gt;a.out_reven_2010 (in=e)&lt;BR /&gt;a.out_reven_2011 (in=f)&lt;BR /&gt;a.out_reven_2012 (in=g)&lt;BR /&gt;a.out_reven_2013 (in=h)&lt;BR /&gt;a.out_reven_2014 (in=i)&lt;BR /&gt;a.out_reven_2015 (in=j)&lt;BR /&gt;a.out_reven_2016 (in=k)&lt;BR /&gt;a.out_reven_2017 (in=l)&lt;BR /&gt;a.out_reven_2018 (in=m)&lt;BR /&gt;a.out_reven_2019 (in=n)&lt;BR /&gt;;&lt;BR /&gt;by PATIENT_ID;&lt;BR /&gt;ina=a;&lt;BR /&gt;inb=b;&lt;BR /&gt;inc=c;&lt;BR /&gt;ind=d;&lt;BR /&gt;ine=e;&lt;BR /&gt;inf=f;&lt;BR /&gt;ing=g;&lt;BR /&gt;inh=h;&lt;BR /&gt;ini=i;&lt;BR /&gt;inj=j;&lt;BR /&gt;ink=k;&lt;BR /&gt;inl=l;&lt;BR /&gt;inm=m;&lt;BR /&gt;inn=n;&lt;BR /&gt;run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;(2)&lt;/P&gt;&lt;P&gt;/*71,772,502 observations and 53 variables*/&lt;/P&gt;&lt;P&gt;data ids;&amp;nbsp;&lt;BR /&gt;set a.out_reven_2006&lt;BR /&gt;a.out_reven_2007&lt;BR /&gt;a.out_reven_2008&lt;BR /&gt;a.out_reven_2009&lt;BR /&gt;a.out_reven_2010&lt;BR /&gt;a.out_reven_2011&lt;BR /&gt;a.out_reven_2012&lt;BR /&gt;a.out_reven_2013&lt;BR /&gt;a.out_reven_2014&lt;BR /&gt;a.out_reven_2015&lt;BR /&gt;a.out_reven_2016&lt;BR /&gt;a.out_reven_2017&lt;BR /&gt;a.out_reven_2018&lt;BR /&gt;a.out_reven_2019 indsname =source;&lt;BR /&gt;dsn=source;&lt;BR /&gt;by PATIENT_ID;&lt;BR /&gt;run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 21:43:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866881#M342368</guid>
      <dc:creator>CathyVI</dc:creator>
      <dc:date>2023-03-28T21:43:18Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866884#M342370</link>
      <description>MERGE is different operation than SET. You should not expect the same number of rows using the two different methods. &lt;BR /&gt;&lt;BR /&gt;SET will just stack the data sets together. It should not have a BY statement in this example/usage. &lt;BR /&gt;&lt;BR /&gt;MERGE tries to join the data sets by the key variable and any duplicate variables are overwritten. &lt;BR /&gt;&lt;BR /&gt;For your question you should limit the variable to just your ID anyways otherwise you're wasting space.</description>
      <pubDate>Tue, 28 Mar 2023 21:57:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866884#M342370</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2023-03-28T21:57:59Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866887#M342373</link>
      <description>Thank you Reeza, this is very helpful.</description>
      <pubDate>Tue, 28 Mar 2023 22:42:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866887#M342373</guid>
      <dc:creator>CathyVI</dc:creator>
      <dc:date>2023-03-28T22:42:15Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866888#M342374</link>
      <description>&lt;P&gt;Using a BY statement with SET will INTERLEAVE the observations rather than just APPENDING them.&lt;/P&gt;
&lt;P&gt;So the result is a dataset that is sorted.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 22:47:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866888#M342374</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2023-03-28T22:47:06Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866894#M342379</link>
      <description>&lt;P&gt;So you get more VARIABLES with the MERGE data step since you are creating all of those INx variable instead of just the one DSN variable in the SET data step.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You get more OBSEVATIONS with the SET data step since you are interleaving the observations instead or merging them.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;So you now know that there are 23 million distinct patients.&amp;nbsp; And that one average they appear in about 3 different years.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 23:28:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/866894#M342379</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2023-03-28T23:28:53Z</dc:date>
    </item>
    <item>
      <title>Re: if observation in one dataset is in another dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/893528#M353012</link>
      <description>I found this interesting!</description>
      <pubDate>Mon, 11 Sep 2023 05:36:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/if-observation-in-one-dataset-is-in-another-dataset/m-p/893528#M353012</guid>
      <dc:creator>DrAbhijeetSafai</dc:creator>
      <dc:date>2023-09-11T05:36:37Z</dc:date>
    </item>
  </channel>
</rss>

