<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Urgent Help in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336262#M62985</link>
    <description>&lt;P&gt;Technically speaking, you don't have to dedup where you've got multiple '27's in a claim - you'll get a warning about a merge statement with multiple instances of by variables. But in &lt;EM&gt;this&lt;/EM&gt; case it doesn't matter.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you want to, you can do it in a data step (with or without a view - it's about the same efficiency). I randomly generated 10 million claims and loss codes, and it still ran in under two seconds:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data claim_27v / view=claim_27v;
set claims;
by claim_number;
where loss_code = '27';
if first.claim_number;
keep claim_number;
run;

data want;
merge claims(keep=claim_number)
      claim_27v(in=in_loss);
by claim_number;
if not in_loss;
if first.claim_number;
keep claim_number;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Mon, 27 Feb 2017 16:58:07 GMT</pubDate>
    <dc:creator>LaurieF</dc:creator>
    <dc:date>2017-02-27T16:58:07Z</dc:date>
    <item>
      <title>Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336117#M62967</link>
      <description>&lt;P&gt;Hi i have&amp;nbsp;list of claim numbers each with different loss codes like 27,27C,27Y,27O,27P . Now my problem is i watnt to get the claim numbers which have the loss codes except 27. Hope it make sense.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;Claim Numers&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Loss codes&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0001&lt;/TD&gt;&lt;TD&gt;27&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0002&lt;/TD&gt;&lt;TD&gt;27C&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0002&lt;/TD&gt;&lt;TD&gt;27&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0003&lt;/TD&gt;&lt;TD&gt;27Y&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0003&lt;/TD&gt;&lt;TD&gt;27C&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0003&lt;/TD&gt;&lt;TD&gt;27P&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0004&lt;/TD&gt;&lt;TD&gt;27O&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0004&lt;/TD&gt;&lt;TD&gt;27Y&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0004&lt;/TD&gt;&lt;TD&gt;27&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0004&lt;/TD&gt;&lt;TD&gt;27P&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0005&lt;/TD&gt;&lt;TD&gt;27P&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0005&lt;/TD&gt;&lt;TD&gt;27O&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0005&lt;/TD&gt;&lt;TD&gt;27C&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0005&lt;/TD&gt;&lt;TD&gt;27&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0006&lt;/TD&gt;&lt;TD&gt;27Y&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0006&lt;/TD&gt;&lt;TD&gt;27C&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0007&lt;/TD&gt;&lt;TD&gt;27O&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0007&lt;/TD&gt;&lt;TD&gt;27Y&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0008&lt;/TD&gt;&lt;TD&gt;27O&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;CML0009&lt;/TD&gt;&lt;TD&gt;27P&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Below&amp;nbsp;claim numbers should be displayed.&amp;nbsp;&lt;/P&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#000000" face="Calibri" size="3"&gt;CML0003&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#000000" face="Calibri" size="3"&gt;CML0006&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#000000" face="Calibri" size="3"&gt;CML0007&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#000000" face="Calibri" size="3"&gt;CML0008&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;FONT color="#000000" face="Calibri" size="3"&gt;CML0009&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks in advanceee&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 08:38:01 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336117#M62967</guid>
      <dc:creator>Reddi</dc:creator>
      <dc:date>2017-02-27T08:38:01Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336121#M62968</link>
      <description>&lt;P&gt;By merging the dataset in twice, but with a&amp;nbsp;&lt;EM&gt;where&lt;/EM&gt; clause on the claims you don't want,&amp;nbsp;&lt;EM&gt;and&lt;/EM&gt; by doing a&amp;nbsp;&lt;EM&gt;first.&lt;/EM&gt; on claim_number, this works (as long as '27's will be unique per claim number):&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data claims;
infile cards dsd dlm='09'x;
attrib claim_number length=$ 7;
attrib loss_code length=$ 3;
input claim_number
      loss_code;
cards;
CML0001	27
CML0002	27C
CML0002	27
CML0003	27Y
CML0003	27C
CML0003	27P
CML0004	27O
CML0004	27Y
CML0004	27
CML0004	27P
CML0005	27P
CML0005	27O
CML0005	27C
CML0005	27
CML0006	27Y
CML0006	27C
CML0007	27O
CML0007	27Y
CML0008	27O
CML0009	27P
;
run;

data want;
merge claims(keep=claim_number)
      claims(in=in_loss where=(loss_code = '27'));
by claim_number;
if not in_loss;
if first.claim_number;
keep claim_number;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Is that all you want?&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 08:56:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336121#M62968</guid>
      <dc:creator>LaurieF</dc:creator>
      <dc:date>2017-02-27T08:56:35Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336128#M62969</link>
      <description>&lt;P&gt;That's Right on Laurie &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; Thanks . I have duplicate claim numbers with same loss codes but i can perform the proc sort by claim number and loss codes and execute the same program to get the result.&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 09:33:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336128#M62969</guid>
      <dc:creator>Reddi</dc:creator>
      <dc:date>2017-02-27T09:33:36Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336147#M62970</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data claims;
infile cards dsd dlm='09'x;
attrib claim_number length=$ 7;
attrib loss_code length=$ 3;
input claim_number
      loss_code;
cards;
CML0001	27
CML0002	27C
CML0002	27
CML0003	27Y
CML0003	27C
CML0003	27P
CML0004	27O
CML0004	27Y
CML0004	27
CML0004	27P
CML0005	27P
CML0005	27O
CML0005	27C
CML0005	27
CML0006	27Y
CML0006	27C
CML0007	27O
CML0007	27Y
CML0008	27O
CML0009	27P
;
run;
proc sql;
select distinct claim_number
 from claims
  group by claim_number
   having sum(loss_code='27')=0;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 27 Feb 2017 10:44:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336147#M62970</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-02-27T10:44:37Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336253#M62982</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;If you have a very large table this may be the fastest method.
I think it reuses the cache created by the first 'set'?

data want;
  retain gotone 0;
  do until (last.claim_number);
     set claims;
     by claim_number;
     if loss_code='27' then gotone=1;
  end;
  put gotone;
  do until (last.claim_number);
     set claims;
     by claim_number;
     if last.claim_number and gotone =0 then output;
  end;
  gotone=0;
run;quit;


&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 27 Feb 2017 16:17:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336253#M62982</guid>
      <dc:creator>rogerjdeangelis</dc:creator>
      <dc:date>2017-02-27T16:17:05Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336262#M62985</link>
      <description>&lt;P&gt;Technically speaking, you don't have to dedup where you've got multiple '27's in a claim - you'll get a warning about a merge statement with multiple instances of by variables. But in &lt;EM&gt;this&lt;/EM&gt; case it doesn't matter.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you want to, you can do it in a data step (with or without a view - it's about the same efficiency). I randomly generated 10 million claims and loss codes, and it still ran in under two seconds:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data claim_27v / view=claim_27v;
set claims;
by claim_number;
where loss_code = '27';
if first.claim_number;
keep claim_number;
run;

data want;
merge claims(keep=claim_number)
      claim_27v(in=in_loss);
by claim_number;
if not in_loss;
if first.claim_number;
keep claim_number;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 27 Feb 2017 16:58:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336262#M62985</guid>
      <dc:creator>LaurieF</dc:creator>
      <dc:date>2017-02-27T16:58:07Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336263#M62986</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/42345"&gt;@rogerjdeangelis&lt;/a&gt;&amp;nbsp;I ran the simple merge against your code over 10 million rows - there's nothing in it. Effectively by reusing the dataset in a merge, the cache is reused as well.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You may like to take the&amp;nbsp;&lt;EM&gt;put&lt;/EM&gt; statement out though - I think you might have log problems!&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 17:02:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336263#M62986</guid>
      <dc:creator>LaurieF</dc:creator>
      <dc:date>2017-02-27T17:02:46Z</dc:date>
    </item>
    <item>
      <title>Re: Urgent Help</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336280#M62991</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;It worked for me, see log below.

The merge and the DOW (two set statements)
had identical times.

I ran 100,000,000. My $600 workstation is too
fast for 10,000,000.


data claims;
input claim_number $ loss_code $;
cards4;
CML0001 27
CML0002 27C
CML0002 27
CML0003 27Y
CML0003 27C
CML0003 27P
CML0004 27O
CML0004 27Y
CML0004 27
CML0004 27P
CML0005 27P
CML0005 27O
CML0005 27C
CML0005 27
CML0006 27Y
CML0006 27C
CML0007 27O
CML0007 27Y
CML0008 27O
CML0009 27P
;;;;
run;


data want(keep=claim_number);
  retain gotone 0;
  do until (last.claim_number);
     set claims;
     by claim_number;
     if loss_code='27' then gotone=1;
  end;
  do until (last.claim_number);
     set claims;
     by claim_number;
     if last.claim_number and gotone =0 then output;
  end;
  gotone=0;
run;quit;

proc print data=want;
run;quit;


456   data claims;
457   input claim_number $ loss_code $;
458   cards4;

NOTE: The data set WORK.CLAIMS has 20 observations and 2 variables.
NOTE: DATA statement used (Total process time):
      real time           0.01 seconds
      cpu time            0.00 seconds


479   ;;;;
480   run;
481   data want(keep=claim_number);
482     retain gotone 0;
483     do until (last.claim_number);
484        set claims;
485        by claim_number;
486        if loss_code='27' then gotone=1;
487     end;
488     do until (last.claim_number);
489        set claims;
490        by claim_number;
491        if last.claim_number and gotone =0 then output;
492     end;
493     gotone=0;
494   run;

NOTE: There were 20 observations read from the data set WORK.CLAIMS.
NOTE: There were 20 observations read from the data set WORK.CLAIMS.
NOTE: The data set WORK.WANT has 5 observations and 1 variables.
NOTE: DATA statement used (Total process time):
      real time           0.00 seconds
      cpu time            0.01 seconds


494 !     quit;
495   proc print data=want;
496   run;

NOTE: There were 5 observations read from the data set WORK.WANT.
NOTE: PROCEDURE PRINT used (Total process time):
      real time           0.02 seconds
      cpu time            0.03 seconds

496 !     quit;


Up to 40 obs from WORK.WANT total obs=5

       CLAIM_
Obs    NUMBER

 1     CML0003
 2     CML0006
 3     CML0007
 4     CML0008
 5     CML0009



Benchmark

data claims100mm;
  set claims;
  do rec=1 to 5000000;
    output;
  end;
run;quit;


data want;
merge claims100mm(keep=claim_number)
      claims100mm(in=in_loss where=(loss_code = '27'));
by claim_number;
if not in_loss;
if first.claim_number;
keep claim_number;
run;


542   data want;
543   merge claims100mm(keep=claim_number)
544         claims100mm(in=in_loss where=(loss_code = '27'));
545   by claim_number;
546   if not in_loss;
547   if first.claim_number;
548   keep claim_number;
549   run;

NOTE: MERGE statement has more than one data set with repeats of BY values.
NOTE: There were 100000000 observations read from the data set WORK.CLAIMS100MM.
NOTE: There were 20000000 observations read from the data set WORK.CLAIMS100MM.
      WHERE loss_code='27';
NOTE: The data set WORK.WANT has 5 observations and 1 variables.
NOTE: DATA statement used (Total process time):
      real time           27.63 seconds
      cpu time            27.62 seconds




data want(keep=claim_number);
  retain gotone 0;
  do until (last.claim_number);
     set claims100mm;
     by claim_number;
     if loss_code='27' then gotone=1;
  end;
  do until (last.claim_number);
     set claims100mm;
     by claim_number;
     if last.claim_number and gotone =0 then output;
  end;
  gotone=0;
run;



NOTE: There were 100000000 observations read from the data set WORK.CLAIMS100MM.
NOTE: There were 100000000 observations read from the data set WORK.CLAIMS100MM.
NOTE: The data set WORK.WANT has 5 observations and 1 variables.
NOTE: DATA statement used (Total process time):
      real time           27.08 seconds
      cpu time            27.00 seconds


&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 27 Feb 2017 17:58:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Urgent-Help/m-p/336280#M62991</guid>
      <dc:creator>rogerjdeangelis</dc:creator>
      <dc:date>2017-02-27T17:58:36Z</dc:date>
    </item>
  </channel>
</rss>

