<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Deduping Columns in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234335#M42848</link>
    <description>&lt;P&gt;You are not telling why you want to do this.&lt;/P&gt;
&lt;P&gt;Having wide table structure often leads to awkward programming.&lt;/P&gt;
&lt;P&gt;Most scenarios&amp;nbsp;benefit when normalizing your data, in this case transposing&amp;nbsp;the other way, leaving you with ID, VAR_name, VAR_Value.&lt;/P&gt;</description>
    <pubDate>Thu, 12 Nov 2015 07:27:48 GMT</pubDate>
    <dc:creator>LinusH</dc:creator>
    <dc:date>2015-11-12T07:27:48Z</dc:date>
    <item>
      <title>Deduping Columns</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234211#M42813</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I have this following data set, call table1:&amp;nbsp;&lt;/P&gt;
&lt;TABLE width="384"&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD width="64"&gt;ID&lt;/TD&gt;
&lt;TD width="64"&gt;VAR1&lt;/TD&gt;
&lt;TD width="64"&gt;VAR2&lt;/TD&gt;
&lt;TD width="64"&gt;VAR3&lt;/TD&gt;
&lt;TD width="64"&gt;VAR4&lt;/TD&gt;
&lt;TD width="64"&gt;VAR5&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t1&lt;/TD&gt;
&lt;TD&gt;t1_1&lt;/TD&gt;
&lt;TD&gt;t1_2&lt;/TD&gt;
&lt;TD&gt;t1_3&lt;/TD&gt;
&lt;TD&gt;t1_4&lt;/TD&gt;
&lt;TD&gt;t1_5&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t1&lt;/TD&gt;
&lt;TD&gt;t1_4&lt;/TD&gt;
&lt;TD&gt;t1_5&lt;/TD&gt;
&lt;TD&gt;t1_6&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;t1_7&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t2&lt;/TD&gt;
&lt;TD&gt;t2_1&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;t2_2&lt;/TD&gt;
&lt;TD&gt;t2_3&lt;/TD&gt;
&lt;TD&gt;t2_4&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t3&lt;/TD&gt;
&lt;TD&gt;t3_1&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;What I want:&amp;nbsp;&lt;/P&gt;
&lt;TABLE width="512"&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD width="64"&gt;ID&lt;/TD&gt;
&lt;TD width="64"&gt;VAR1&lt;/TD&gt;
&lt;TD width="64"&gt;VAR2&lt;/TD&gt;
&lt;TD width="64"&gt;VAR3&lt;/TD&gt;
&lt;TD width="64"&gt;VAR4&lt;/TD&gt;
&lt;TD width="64"&gt;VAR5&lt;/TD&gt;
&lt;TD width="64"&gt;VAR6&lt;/TD&gt;
&lt;TD width="64"&gt;VAR7&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t1&lt;/TD&gt;
&lt;TD&gt;t1_1&lt;/TD&gt;
&lt;TD&gt;t1_2&lt;/TD&gt;
&lt;TD&gt;t1_3&lt;/TD&gt;
&lt;TD&gt;t1_4&lt;/TD&gt;
&lt;TD&gt;t1_5&lt;/TD&gt;
&lt;TD&gt;t1_6&lt;/TD&gt;
&lt;TD&gt;t1_7&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t2&lt;/TD&gt;
&lt;TD&gt;t2_1&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;t2_2&lt;/TD&gt;
&lt;TD&gt;t2_3&lt;/TD&gt;
&lt;TD&gt;t2_4&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;t3&lt;/TD&gt;
&lt;TD&gt;t3_1&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Essentially I want to add all the&amp;nbsp;&lt;STRONG&gt;tx_y&amp;nbsp;&lt;/STRONG&gt;together, match it to respective&amp;nbsp;&lt;EM&gt;&lt;STRONG&gt;tx&lt;/STRONG&gt;&lt;/EM&gt; and then remove any duplicates in that row.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I initially transpose the data step:&lt;EM&gt;&amp;nbsp;&lt;/EM&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;I&gt;proc transpose data=Table1&lt;/I&gt;&lt;/P&gt;
&lt;P&gt;&lt;I&gt;out=Table1_T;&lt;BR /&gt;by ID;&lt;BR /&gt;var &lt;SPAN&gt;VAR1&lt;/SPAN&gt;;&lt;BR /&gt;run;&lt;/I&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;But this seems like a very lengthy and ineffcient way of coding.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Can you recommend any other efficient technique to essentially dedup columns ?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 11 Nov 2015 15:24:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234211#M42813</guid>
      <dc:creator>mjheever</dc:creator>
      <dc:date>2015-11-11T15:24:36Z</dc:date>
    </item>
    <item>
      <title>Re: Deduping Columns</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234219#M42816</link>
      <description>&lt;P&gt;Well, your probelm is that&amp;nbsp;your data isn't structured in a good way, hence your finding it difficult to work with it. &amp;nbsp;In these cases its a good idea to keep your data normalised until you actually need a transposed dataset. &amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have;
  infile datalines dlm=" " missover;
  input ID $	VAR1 $ VAR2 $ 	VAR3 $	VAR4 $	VAR5 $;
datalines;
t1	t1_1	t1_2	t1_3	t1_4	t1_5
t1	t1_4	t1_5	t1_6	 t1_7	 
t2	t2_1 	t2_2	t2_3	t2_4	 
t3	t3_1	 
;
run;
data want (keep=id result);
  set have (drop=id);
  length id $2;
  array var{5};
  do i=1 to 5;
    if var{i} ne "" then do;
      id=scan(var{i},1,"_");
      result=input(scan(var{i},2,"_"),1.);
      output;
    end;
  end;
run;
proc sort data=want nodupkey;
  by id result;
run;&lt;/PRE&gt;
&lt;P&gt;You will see that i split id and result out, as this is another bad setup, keep individual elements of data in separate variables, until they actually need to be one. &amp;nbsp;Again it makes working with the data much easier.&lt;/P&gt;</description>
      <pubDate>Wed, 11 Nov 2015 16:09:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234219#M42816</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2015-11-11T16:09:44Z</dc:date>
    </item>
    <item>
      <title>Re: Deduping Columns</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234249#M42820</link>
      <description>&lt;P&gt;Similar to&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/45151"&gt;@RW9﻿&lt;/a&gt;'s suggestion, here is one to get the exact outcome you asked for. Please note, the dimension of temporary array is arbituary (eg. 20), make sure it is large enough to accomodate the distinct count of your values per id group. If sought for a full dynamic solution, Hash can be implemented. The following code assumes incoming data is presorted by 'id'.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;data h1;
do until (last.id);
set have;
by id notsorted;
array _t(20) $ 5 _temporary_;
array var var:;
do over var;
if var not in _t then do; i+1;_t(i)=var;end;
end;
end;
call sortc(of _t(*));
do i=1 to dim(_t);
_var=_t(i);
put _var=;
if not missing(_var) then output;
end;
call missing(of _t(*));
i=0;
keep id _var;
run;

proc transpose data=h1 out=want(drop=_name_) prefix=VAR;
by id;
var _var;
run;&lt;/PRE&gt;</description>
      <pubDate>Wed, 11 Nov 2015 17:55:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234249#M42820</guid>
      <dc:creator>Haikuo</dc:creator>
      <dc:date>2015-11-11T17:55:44Z</dc:date>
    </item>
    <item>
      <title>Re: Deduping Columns</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234270#M42828</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
   do until(last.id);
   set have;
   by id notsorted;
   retain max;
   array var var:;
   array temp(*) $ temp1-temp10;
   if first.id then k=1;
   do over var;
   if not missing(var) and whichc(var,of temp(*))=0 then do;
      temp(k)=var;
      k+1;
   end;
   end;
   k=cmiss(of temp(*))+1;
   if last.id then do;
     num=whichc('',of temp(*)); output;
	 call missing(of temp(*));
   end;
   end;
   max=max(max,num);
   call symputx('max',max);
   drop temp&amp;amp;max-temp10 k var: max num;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 11 Nov 2015 19:28:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234270#M42828</guid>
      <dc:creator>slchen</dc:creator>
      <dc:date>2015-11-11T19:28:51Z</dc:date>
    </item>
    <item>
      <title>Re: Deduping Columns</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234335#M42848</link>
      <description>&lt;P&gt;You are not telling why you want to do this.&lt;/P&gt;
&lt;P&gt;Having wide table structure often leads to awkward programming.&lt;/P&gt;
&lt;P&gt;Most scenarios&amp;nbsp;benefit when normalizing your data, in this case transposing&amp;nbsp;the other way, leaving you with ID, VAR_name, VAR_Value.&lt;/P&gt;</description>
      <pubDate>Thu, 12 Nov 2015 07:27:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deduping-Columns/m-p/234335#M42848</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2015-11-12T07:27:48Z</dc:date>
    </item>
  </channel>
</rss>

