<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: &amp;quot;Moving Distinct&amp;quot; Counters in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265750#M52303</link>
    <description>&lt;P&gt;Hey Haikuo!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you so much!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;That's a really intuitive answer to the problem, and gets me exactly what I need. Thank you so much.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Jason&lt;/P&gt;</description>
    <pubDate>Fri, 22 Apr 2016 19:21:27 GMT</pubDate>
    <dc:creator>JS</dc:creator>
    <dc:date>2016-04-22T19:21:27Z</dc:date>
    <item>
      <title>"Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265694#M52282</link>
      <description>&lt;P&gt;Hello Friends!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm trying to create a "Moving Distinct Variable" counter by ID.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Basically, I have a dataset which contains transaction records which tracks user changes in their account profile. I want to build in a variable that tracks how many distinct emails the user has had an that specific point in time, and another variable that indicates whether or not that email was the first time we've seen it, or if they're switching back to an old one they had before.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;ID Date email_address&lt;/P&gt;&lt;P&gt;josh 1/1/2015 josh1@gmail.com&lt;BR /&gt;josh 1/2/2015 josh2@yahoo.com&lt;BR /&gt;josh 1/3/2015 josh3@yahoo.com&lt;BR /&gt;mary 1/4/2015 mary123@aol.com&lt;BR /&gt;mary 1/5/2015 mars@blah.com&lt;BR /&gt;josh 1/6/2015 josh1@gmail.com&lt;BR /&gt;josh 1/7/2015 josh2@yahoo.com&lt;BR /&gt;josh 1/8/2015 josh3@yahoo.com&lt;BR /&gt;mary 1/9/2015 mary123@aol.com&lt;BR /&gt;mary 1/10/2015 mars@blah.com&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;ID Date email_address distinct_emails_for_user&amp;nbsp;first_time_email_seen_with_user&lt;/P&gt;&lt;P&gt;josh 1/1/2015 josh1@gmail.com 1 YES&lt;BR /&gt;josh 1/2/2015 josh2@yahoo.com 2 YES&lt;BR /&gt;josh 1/3/2015 josh3@yahoo.com 3 YES&lt;BR /&gt;mary 1/4/2015 mary123@aol.com 1 YES&lt;BR /&gt;mary 1/5/2015 mars@blah.com 2 YES&lt;BR /&gt;josh 1/6/2015 josh1@gmail.com 3 NO&lt;BR /&gt;josh 1/7/2015 josh2@yahoo.com 3 NO&lt;BR /&gt;josh 1/8/2015 josh3@yahoo.com 3 NO&lt;BR /&gt;mary 1/9/2015 mary123@aol.com 2 NO&lt;BR /&gt;mary 1/10/2015 mars@blah.com 2 NO&lt;/P&gt;</description>
      <pubDate>Fri, 22 Apr 2016 17:07:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265694#M52282</guid>
      <dc:creator>JS</dc:creator>
      <dc:date>2016-04-22T17:07:33Z</dc:date>
    </item>
    <item>
      <title>Re: "Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265730#M52294</link>
      <description>&lt;P&gt;You sure can try it using Hash object, Proc SQL is just simpler to code:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	input ID$ Date:mmddyy10. email_address: $40.;
	format date mmddyy10.;
	cards;
josh 1/1/2015 josh1@gmail.com
josh 1/2/2015 josh2@yahoo.com
josh 1/3/2015 josh3@yahoo.com
mary 1/4/2015 mary123@aol.com
mary 1/5/2015 mars@blah.com
josh 1/6/2015 josh1@gmail.com
josh 1/7/2015 josh2@yahoo.com
josh 1/8/2015 josh3@yahoo.com
mary 1/9/2015 mary123@aol.com
mary 1/10/2015 mars@blah.com
;

proc sql;
	create table want as
		select *, (select count(distinct email_address) from have where id=a.id and date le a.date) as distinct_emails_for_user,
		(select 
			case 
				when sum(email_address=a.email_address)&amp;gt;1 then 'NO' 
				ELSE 'YES' 
			END  
		from have where id=a.id and date le a.date) 
			As first_time_email_seen_with_user
				fROM HAVE A
	;
QUIT;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 22 Apr 2016 18:17:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265730#M52294</guid>
      <dc:creator>Haikuo</dc:creator>
      <dc:date>2016-04-22T18:17:10Z</dc:date>
    </item>
    <item>
      <title>Re: "Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265750#M52303</link>
      <description>&lt;P&gt;Hey Haikuo!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you so much!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;That's a really intuitive answer to the problem, and gets me exactly what I need. Thank you so much.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Jason&lt;/P&gt;</description>
      <pubDate>Fri, 22 Apr 2016 19:21:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265750#M52303</guid>
      <dc:creator>JS</dc:creator>
      <dc:date>2016-04-22T19:21:27Z</dc:date>
    </item>
    <item>
      <title>Re: "Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265803#M52331</link>
      <description>&lt;PRE&gt;
data have;
	input ID $ Date:mmddyy10. email_address: $40.;
	format date mmddyy10.;
	cards;
josh 1/1/2015 josh1@gmail.com
josh 1/2/2015 josh2@yahoo.com
josh 1/3/2015 josh3@yahoo.com
mary 1/4/2015 mary123@aol.com
mary 1/5/2015 mars@blah.com
josh 1/6/2015 josh1@gmail.com
josh 1/7/2015 josh2@yahoo.com
josh 1/8/2015 josh3@yahoo.com
mary 1/9/2015 mary123@aol.com
mary 1/10/2015 mars@blah.com
;
run;
data want;
 if _n_ eq 1 then do;
  declare hash hh();
  hh.definekey('id');
  hh.definedata('count');
  hh.definedone();
  declare hash h();
  h.definekey('id','email_address');
  h.definedone();
 end;
 set have;
 length flag $ 8; 
 if h.find()=0 then do;
   hh.find();flag='NO';
 end;
 else do;
  flag='YES';
  if hh.find()=0 then count+1;
   else count=1;
 h.replace();hh.replace();
 end;
run;

&lt;/PRE&gt;</description>
      <pubDate>Sat, 23 Apr 2016 04:25:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/265803#M52331</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2016-04-23T04:25:32Z</dc:date>
    </item>
    <item>
      <title>Re: "Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/266106#M52405</link>
      <description>&lt;P&gt;Thanks again for the help.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The issue that I am running into at the moment is runtime.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The table that I am attempting to draw from has approximately 1.8M records. I pulled stopped the query early to see what progress was made after 1M (see below). At this rate, it would take (1.8M Records / 15 Records in 1 minutes= 120000 minutes, 120000 minutes/ 60 minutes / 24 hours) ~83 Days to complete.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Do you see any optimization opportunities here?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;JS&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;WARNING: The data set ACCT may be incomplete. When this step was stopped there were 15 observations and 25 variables.&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;NOTE: Compressing data set ACCT increased size by 100.00 percent. &lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;Compressed is 2 pages; un-compressed would require 1 pages.&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;WARNING: Data set ACCT was not replaced because this step was stopped.&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;NOTE: PROCEDURE SQL used (Total process time):&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;real time 1:00.26&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;cpu time 1:00.21&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 25 Apr 2016 16:56:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/266106#M52405</guid>
      <dc:creator>JS</dc:creator>
      <dc:date>2016-04-25T16:56:53Z</dc:date>
    </item>
    <item>
      <title>Re: "Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/266107#M52406</link>
      <description>&lt;P&gt;Thanks Xia Keshan.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Would this be preferred over the SQL counterpart? Do you see one better than the other?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Mon, 25 Apr 2016 16:57:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/266107#M52406</guid>
      <dc:creator>JS</dc:creator>
      <dc:date>2016-04-25T16:57:57Z</dc:date>
    </item>
    <item>
      <title>Re: "Moving Distinct" Counters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/266253#M52452</link>
      <description>Yeah. SQL is not good for Big Table due to calculated at very obs(record). If Table is small , both could be accepted.</description>
      <pubDate>Tue, 26 Apr 2016 01:13:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/quot-Moving-Distinct-quot-Counters/m-p/266253#M52452</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2016-04-26T01:13:23Z</dc:date>
    </item>
  </channel>
</rss>

