<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to assign groups in larger proc sql without creating new table in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-assign-groups-in-larger-proc-sql-without-creating-new/m-p/775447#M246496</link>
    <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12447"&gt;@Patrick&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I modified the code a little bit and I think I have most of it figured out except the fact that I want to obtain 1&amp;nbsp; row for each store_id, zip combination that shows STORE_ID, zip_cd, no_transactions (summed), no_customers (summed), no_customers_race (summed) open/close_year, min/max_transaction_date.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create view store_open_close as
  select 
    l.store_id,
    l.zip_cd,
    min(l.year) as open_year,
    max(l.year) as close_year,
    min(r.transaction_date) as min_transaction_date format=date9.,
    max(r.transaction_date) as max_transaction_date format=date9.
  from 
    have0 l
    left join
    have1 r
      on l.store_id=r.store_id and l.year=year(r.transaction_date)
  group by l.store_id, l.zip_cd
  ;
quit;


proc sql;
	create table long as
	select
	l.*,
	count(distinct r.transaction_id) as no_transactions,  /*issue here: I think it should be count(distinct r.transaction_id)*/
	count(distinct r.customer_id) as no_customers_overall,&lt;BR /&gt;/*these customer race are wrong since I don't know how to count distinct id and group race*/
	count(case when (t.customer_race='white') then t.customer_race end) as no_customers_white,
	count(case when (t.customer_race='black') then t.customer_race end) as no_customers_black,
	count(case when (t.customer_race='latino') then t.customer_race end) as no_customers_latino,
	count(case when (t.customer_race='other') then t.customer_race end) as no_customers_other,
	s.open_year,
	s.close_year,
	s.min_transaction_date,
	s.max_transaction_date
	from have0 l

	left join
	have1 r
	on l.store_id=r.store_id and l.year=year(r.transaction_date)

	left join
	have2 t
	on r.customer_id=t.customer_id and l.year=year(r.transaction_date)

	left join 
	store_open_close s
	on l.store_id=s.store_id and l.zip_cd=s.zip_cd

	group by 
	l.store_id, l.zip_cd, s.open_year, s.close_year, s.min_transaction_date, s.max_transaction_date, l.year
	;
quit;&lt;BR /&gt;&lt;BR /&gt;/*Count totals*/
proc sql;&lt;BR /&gt;create table new as &lt;BR /&gt;select store_id, zip_cd, open_year, close_year, min_transaction_date, max_transaction_date,&lt;BR /&gt;sum(no_transactions) as total_transactions,&lt;BR /&gt;sum(no_customers_overall) as total_customers,&lt;BR /&gt;sum(no_customers_white) as total_customers_white,&lt;BR /&gt;sum(no_customers_black) as total_customers_black,&lt;BR /&gt;sum(no_customers_latino) as total_customers_latino,&lt;BR /&gt;sum(no_customers_other) as total_customers_other&lt;BR /&gt;from long&lt;BR /&gt;group by store_id, zip_cd, open_year, close_year, min_transaction_date, max_transaction_date;&lt;BR /&gt;quit;&lt;BR /&gt;
&lt;BR /&gt;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 21 Oct 2021 00:15:32 GMT</pubDate>
    <dc:creator>A_Swoosh</dc:creator>
    <dc:date>2021-10-21T00:15:32Z</dc:date>
    <item>
      <title>How to assign groups in larger proc sql without creating new table</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-assign-groups-in-larger-proc-sql-without-creating-new/m-p/775303#M246442</link>
      <description>&lt;P&gt;Hi all,&lt;/P&gt;
&lt;P&gt;I'm working off a previous question that I asked the SAS community before. I have 3 datasets now where I'm attempting to add pieces of information from each of the 3 datasets into 1 larger dataset.&lt;/P&gt;
&lt;P&gt;Currently, I have:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*STORE INFORMATION*/&lt;BR /&gt;data have0;
  infile cards truncover expandtabs;
  input STORE_ID $ zip_cd year;
  format zip_cd z5.;
  cards;
A0001 99993 2010 
A0001 99993 2011 
A0001 99994 2012 
A0001 99994 2013 
A0001 99994 2014 
A0001 99995 2015 
A0002 33999 2011 
A0002 33999 2012 
A0002 33999 2013 
A0002 33995 2014 
A0002 33993 2015 
A0003 33999 2010 
A0003 33999 2011 
A0003 33999 2012 
A0003 33999 2013 
A0003 33999 2014 
A0003 33999 2015 
A0004 33999 2014 
A0004 33999 2015 
A0005 33999 2011 
A0005 33999 2012 
A0005 33995 2013 
;
&lt;BR /&gt;/*TRANSACTION INFORMATION*/
data have1;
  infile cards truncover expandtabs;
  input STORE_ID $ customer_id $ transaction_id $ transaction_date :date9.;
  format transaction_date date8.;
  cards;
A0001 aa 1 22jan2010  
A0001 ab 2 11Oct2010  
A0001 cc 3 16jun2011  
A0001 dd 4 19apr2012  
A0001 ee 5 10feb2013  
A0001 ef 6 09nov2014  
A0001 ff 7 04dec2014  
A0001 ff 8 07dec2014  
A0001 gg 9 22Oct2015  
A0001 ds 10 30nov2015  
A0002 ad 1 11jun2011  
A0002 aq 2 11apr2014  
A0002 qr 3 11feb2015  
A0003 er 1 09nov2010  
A0003 hh 2 04dec2011  
A0003 hh 3 22jan2012  
A0003 hd 4 11Oct2012  
A0003 sd 5 16jun2013  
A0003 cv 6 19apr2014  
A0003 nb 7 10feb2015  
A0003 wr 8 09nov2015  
A0003 yt 9 04dec2015  
A0003 ui 10 04dec2015  
A0004 ol 1 09nov2014  
A0004 rr 2 04dec2014  
A0004 gh 3 04dec2015  
;
&lt;BR /&gt;/*CUSTOMER INFORMATION*/
data have2;
  infile cards truncover expandtabs;
  input customer_id $ customer_race $ ;
  cards;
aa white
ab black
cc latino
dd other
ee white
ef white
ff white
gg black
ds black
ad black 
aq latino
qr latino
er latino
hh other
hd latino
sd other
cv other
nb other
wr white
yt white
ui white
ol white
rr white
gh black
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;With the assistance of someone in the community, I was trying to create a dataset that combined information from the first 2 datasets to create extra rows for stores that opened/closed during my study period and count the number of transactions that occurred during the period they were opened as shown below:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create view store_open_close as
  select 
    l.store_id,
    l.zip_cd,
    min(l.year) as open_year,
    max(l.year) as close_year,
    min(r.transaction_date) as min_transaction_date format=date9.,
    max(r.transaction_date) as max_transaction_date format=date9.
  from 
    have0 l
    left join
    have1 r
      on l.store_id=r.store_id and l.year=year(r.transaction_date)
  group by l.store_id, l.zip_cd
  ;
quit;


proc sql;
	create table long as
	select
	l.*,
	count(r.transaction_id) as no_transactions,  /*issue here: I think it should be count(distinct r.transaction_id)*/
	count(distinct r.customer_id) as no_customers_overall,
	s.open_year,
	s.close_year,
	s.min_transaction_date,
	s.max_transaction_date
	from have0 l

	left join
	have1 r
	on l.store_id=r.store_id and l.year=year(r.transaction_date)

	left join
	have2 t
	on r.customer_id=t.customer_id and l.year=year(r.transaction_date)

	left join 
	store_open_close s
	on l.store_id=s.store_id and l.zip_cd=s.zip_cd

	group by 
	l.store_id, l.zip_cd, s.open_year, s.close_year, s.min_transaction_date, s.max_transaction_date, l.year
	;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I encounter two issues though:&lt;/P&gt;
&lt;P&gt;1. I think counting the number of transactions is not done correctly--I commented where I think it should be count(distinct r.transaction_id) to get the total transactions while open?&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Ideally I should have 1 row for each store_id, zip combination that shows STORE_ID, zip_cd, no_transactions (summed), open/close_year, min/max_transaction_date&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;2. I want to now combine dataset 3 during this piece to also group the customer_id by race and count how many there were as new variables (e.g., no_customer_white, no_customer_black, and so on...) within this proc sql.&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;I started to do it by counting number of customers overall, but I want to also add additional columns to report unique customers by race too. Is that possible?&lt;/LI&gt;
&lt;LI&gt;&lt;STRONG&gt;I also get a NOTE: Invalid (or missing) arguments to the YEAR function have caused the function to return a missing value.&lt;/STRONG&gt;&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;Any help would be appreciated. Thank you!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 20 Oct 2021 00:41:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-assign-groups-in-larger-proc-sql-without-creating-new/m-p/775303#M246442</guid>
      <dc:creator>A_Swoosh</dc:creator>
      <dc:date>2021-10-20T00:41:40Z</dc:date>
    </item>
    <item>
      <title>Re: How to assign groups in larger proc sql without creating new table</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-assign-groups-in-larger-proc-sql-without-creating-new/m-p/775447#M246496</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12447"&gt;@Patrick&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I modified the code a little bit and I think I have most of it figured out except the fact that I want to obtain 1&amp;nbsp; row for each store_id, zip combination that shows STORE_ID, zip_cd, no_transactions (summed), no_customers (summed), no_customers_race (summed) open/close_year, min/max_transaction_date.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create view store_open_close as
  select 
    l.store_id,
    l.zip_cd,
    min(l.year) as open_year,
    max(l.year) as close_year,
    min(r.transaction_date) as min_transaction_date format=date9.,
    max(r.transaction_date) as max_transaction_date format=date9.
  from 
    have0 l
    left join
    have1 r
      on l.store_id=r.store_id and l.year=year(r.transaction_date)
  group by l.store_id, l.zip_cd
  ;
quit;


proc sql;
	create table long as
	select
	l.*,
	count(distinct r.transaction_id) as no_transactions,  /*issue here: I think it should be count(distinct r.transaction_id)*/
	count(distinct r.customer_id) as no_customers_overall,&lt;BR /&gt;/*these customer race are wrong since I don't know how to count distinct id and group race*/
	count(case when (t.customer_race='white') then t.customer_race end) as no_customers_white,
	count(case when (t.customer_race='black') then t.customer_race end) as no_customers_black,
	count(case when (t.customer_race='latino') then t.customer_race end) as no_customers_latino,
	count(case when (t.customer_race='other') then t.customer_race end) as no_customers_other,
	s.open_year,
	s.close_year,
	s.min_transaction_date,
	s.max_transaction_date
	from have0 l

	left join
	have1 r
	on l.store_id=r.store_id and l.year=year(r.transaction_date)

	left join
	have2 t
	on r.customer_id=t.customer_id and l.year=year(r.transaction_date)

	left join 
	store_open_close s
	on l.store_id=s.store_id and l.zip_cd=s.zip_cd

	group by 
	l.store_id, l.zip_cd, s.open_year, s.close_year, s.min_transaction_date, s.max_transaction_date, l.year
	;
quit;&lt;BR /&gt;&lt;BR /&gt;/*Count totals*/
proc sql;&lt;BR /&gt;create table new as &lt;BR /&gt;select store_id, zip_cd, open_year, close_year, min_transaction_date, max_transaction_date,&lt;BR /&gt;sum(no_transactions) as total_transactions,&lt;BR /&gt;sum(no_customers_overall) as total_customers,&lt;BR /&gt;sum(no_customers_white) as total_customers_white,&lt;BR /&gt;sum(no_customers_black) as total_customers_black,&lt;BR /&gt;sum(no_customers_latino) as total_customers_latino,&lt;BR /&gt;sum(no_customers_other) as total_customers_other&lt;BR /&gt;from long&lt;BR /&gt;group by store_id, zip_cd, open_year, close_year, min_transaction_date, max_transaction_date;&lt;BR /&gt;quit;&lt;BR /&gt;
&lt;BR /&gt;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 21 Oct 2021 00:15:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-assign-groups-in-larger-proc-sql-without-creating-new/m-p/775447#M246496</guid>
      <dc:creator>A_Swoosh</dc:creator>
      <dc:date>2021-10-21T00:15:32Z</dc:date>
    </item>
  </channel>
</rss>

