<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Create new dummy variables from multiple columns of character variables in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686189#M208182</link>
    <description>How many different categories do you have? If it's a few it's easy enough to manually code them but if it needs to be dynamic that's more coding...and likely more stable if this is ever going to be run in the future.</description>
    <pubDate>Wed, 23 Sep 2020 19:37:36 GMT</pubDate>
    <dc:creator>Reeza</dc:creator>
    <dc:date>2020-09-23T19:37:36Z</dc:date>
    <item>
      <title>Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686185#M208178</link>
      <description>&lt;P&gt;Hi, I'm trying to create a dummy variable for each character variable.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data have;&lt;/P&gt;&lt;P&gt;StudyID &amp;nbsp; comorbidity1 comorbidity2 comorbidity3 comorbidty4&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; COPD &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;HTN &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;DM &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NONE&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; HTN &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;COPD &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; DM &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; MENTAL_ILLNESS&lt;/P&gt;&lt;P&gt;data want;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; StudyID &amp;nbsp; &amp;nbsp; &amp;nbsp;COPD &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; HTN &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; DM &amp;nbsp; &amp;nbsp; &amp;nbsp; MENTAL_ILLNESS&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 0&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1&lt;/P&gt;</description>
      <pubDate>Wed, 23 Sep 2020 19:31:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686185#M208178</guid>
      <dc:creator>stancemcgraw</dc:creator>
      <dc:date>2020-09-23T19:31:51Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686188#M208181</link>
      <description>&lt;P&gt;Do not create dummy variables yourself. SAS has already done this for you in many procedures, so if you are trying to create an analysis or statistical model involving character variables, then the CLASS statement in most modelling procedures are all that you need.&lt;/P&gt;</description>
      <pubDate>Wed, 23 Sep 2020 19:39:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686188#M208181</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2020-09-23T19:39:14Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686189#M208182</link>
      <description>How many different categories do you have? If it's a few it's easy enough to manually code them but if it needs to be dynamic that's more coding...and likely more stable if this is ever going to be run in the future.</description>
      <pubDate>Wed, 23 Sep 2020 19:37:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686189#M208182</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2020-09-23T19:37:36Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686192#M208184</link>
      <description>Not the way this data is structured - it needs to be restructured regardless here to work properly in a statistical model.</description>
      <pubDate>Wed, 23 Sep 2020 19:39:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686192#M208184</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2020-09-23T19:39:38Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686222#M208195</link>
      <description>&lt;P&gt;I would solve the problem this way:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA	Have	(DROP=Comorbidity)	
		Columns	(KEEP=Comorbidity)
		;
	DROP	_:;
	LENGTH	StudyID			$8
			Comorbidity1	$32
			Comorbidity2	$32
			Comorbidity3	$32
			Comorbidity4	$32
			;
	INFILE	Datalines		MISSOVER;
	INPUT	StudyID			$
			Comorbidity1	$
			Comorbidity2	$
			Comorbidity3	$
			Comorbidity4	$
			;
	ARRAY	Comorbidities	[4]	Comorbidity1 - Comorbidity4;
	DO	_i			=	1	TO	4;
		Comorbidity	=	Comorbidities[_i];
		IF	UPCASE(Comorbidity)	^=	'NONE'	AND
			NOT	MISSING(Comorbidity)		THEN
			OUTPUT	Columns;
	END;
	OUTPUT	Have;
	DELETE;
DATALINES;
1           COPD              HTN              DM         NONE
2           HTN               COPD             DM         MENTAL_ILLNESS
3           DM
4           MENTAL_ILLNESS    NONE
5           DM                MENTAL_ILLNESS
;
RUN;

PROC	SORT	DATA=Columns	NODUPKEY;
	BY	Comorbidity;
RUN;

DATA	_NULL_;
	SET	Columns	NOBS=Nbr_of_Columns;
	IF	_N_	=	1	THEN
		CALL	SYMPUTX('Nbr_of_Columns', Nbr_of_Columns, 'G');
	CALL	SYMPUTX(CATS('Column', PUT(_N_, 3.)), Comorbidity, 'G');
RUN;

DATA	Want;
	SET	Have;
	DROP	Comorbidity1 - Comorbidity&amp;amp;Nbr_of_Columns	j;
	ARRAY	Comorbidities	[*]	Comorbidity1 - Comorbidity&amp;amp;Nbr_of_Columns;

	%MACRO	Init_Column_Values;
		%LOCAL	i;
		%DO	i	=	1	%TO	&amp;amp;Nbr_of_Columns;
			&amp;amp;&amp;amp;Column&amp;amp;i		=	0;
		%END;
	%MEND;
	%Init_Column_Values;

	DO	j	=	1	TO	&amp;amp;Nbr_of_Columns;
		SELECT(Comorbidities[j]);
			%MACRO	Set_Column_Values;
				%LOCAL	i;
				%DO	i	=	1	%TO	&amp;amp;Nbr_of_Columns;
						WHEN	("&amp;amp;&amp;amp;Column&amp;amp;i")
							&amp;amp;&amp;amp;Column&amp;amp;i		=	1;
				%END;
			%MEND;
			%Set_Column_Values
			OTHERWISE;
		END;
	END;
RUN;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The program is dynamic.&amp;nbsp; The program will automatically adjust to however many unique values are in your data.&amp;nbsp; The program will then dynamically determine which columns are populated and which are not setting a boolean (0 or 1).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Results from the above data:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="jimbarbour_0-1600896927154.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/49726i98CEA31702CFF366/image-size/medium?v=v2&amp;amp;px=400" role="button" title="jimbarbour_0-1600896927154.png" alt="jimbarbour_0-1600896927154.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Jim&lt;/P&gt;</description>
      <pubDate>Wed, 23 Sep 2020 21:40:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686222#M208195</guid>
      <dc:creator>jimbarbour</dc:creator>
      <dc:date>2020-09-23T21:40:53Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686225#M208196</link>
      <description>&lt;P&gt;If we add made-up conditions BMI and GERI, as in the following:&lt;/P&gt;
&lt;PRE&gt;1           COPD              HTN              DM         NONE
2           HTN               COPD             DM         MENTAL_ILLNESS
3           DM
4           MENTAL_ILLNESS    NONE
5           DM                MENTAL_ILLNESS
6           BMI               GERI&lt;/PRE&gt;
&lt;P&gt;The program adjusts automatically, and the results are as follows:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="jimbarbour_0-1600897434056.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/49727i213B3CC5DE2F8548/image-size/medium?v=v2&amp;amp;px=400" role="button" title="jimbarbour_0-1600897434056.png" alt="jimbarbour_0-1600897434056.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Jim&lt;/P&gt;</description>
      <pubDate>Wed, 23 Sep 2020 21:44:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686225#M208196</guid>
      <dc:creator>jimbarbour</dc:creator>
      <dc:date>2020-09-23T21:44:33Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686226#M208197</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;*what you start with;
DATA	Have	
		
		;
	
	LENGTH	StudyID			$8
			Comorbidity1	$32
			Comorbidity2	$32
			Comorbidity3	$32
			Comorbidity4	$32
			;
	INFILE	Datalines		MISSOVER;
	INPUT	StudyID			$
			Comorbidity1	$
			Comorbidity2	$
			Comorbidity3	$
			Comorbidity4	$
			;
DATALINES;
1           COPD              HTN              DM         NONE
2           HTN               COPD             DM         MENTAL_ILLNESS
3           DM
4           MENTAL_ILLNESS    NONE
5           DM                MENTAL_ILLNESS
;
RUN;


*transpose to long;
proc transpose data=have out=long;
by studyID;
var comorbidity1-Comorbidity4;
run;

*add indicator variable and delete empty rows;
data long2;
set long;
count=1;
if missing(col1) then delete;
run;

*transpose to wide again;
proc transpose data=long2 out=wide prefix=DX_;
by studyID;
id col1;
var count;
run;

*fill in missing with 0;
data want;
set wide;
array _dx(*) dx_:;
do i=1 to dim(_dx);
if missing(_dx(i)) then _dx(i) = 0;
end;

run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Sep 2020 21:50:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686226#M208197</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2020-09-23T21:50:24Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686431#M208297</link>
      <description>this works until step 3, making data wide again it says, " The ID value "DX_Use_Choices_1000___9999" occurs twice in the same BY group".....&lt;BR /&gt;</description>
      <pubDate>Thu, 24 Sep 2020 16:50:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686431#M208297</guid>
      <dc:creator>stancemcgraw</dc:creator>
      <dc:date>2020-09-24T16:50:44Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686449#M208306</link>
      <description>That means you have duplicate either in the diagnosis or in the IDs? &lt;BR /&gt;Figure out what is duplicated and either de-duplicate it before this process or modify your example data to reflect your actual data ad I can take another look at it.</description>
      <pubDate>Thu, 24 Sep 2020 17:49:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686449#M208306</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2020-09-24T17:49:04Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686457#M208313</link>
      <description>I used the LET option on the transpose statement and that seemed to work. Now I just need to remove the DX_ suffix from each variable. Not sure how to do that...</description>
      <pubDate>Thu, 24 Sep 2020 18:09:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686457#M208313</guid>
      <dc:creator>stancemcgraw</dc:creator>
      <dc:date>2020-09-24T18:09:25Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686486#M208328</link>
      <description>prefix = option on PROC TRANSPOSE added those - I do that to make it easier to reference all the variables at once within an array. You can add IDLABEL so that your labels are clean and use those instead when printing/displaying the report.</description>
      <pubDate>Thu, 24 Sep 2020 19:21:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686486#M208328</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2020-09-24T19:21:43Z</dc:date>
    </item>
    <item>
      <title>Re: Create new dummy variables from multiple columns of character variables</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686569#M208368</link>
      <description>&lt;P&gt;If you really needed the column names without the Dx_ prefix, a little macro that would strip them off can be written like so:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%MACRO	Remove_Prefix_From_Var(Lib=, Set=, Pfx=, Debug=NO);
	%LOCAL	Cmnt;
	%LOCAL	Vars;

	%IF	%QUPCASE(&amp;amp;Debug)	=	YES	%THEN
		%DO;
			%LET	Cmnt	=	;
			%PUT	&amp;amp;Nte1  Debug is on in macro &amp;amp;SysMacroName;
		%END;
	%ELSE
		%DO;
			%LET	Cmnt	=	*;
		%END;

	PROC	CONTENTS	DATA=&amp;amp;Lib..&amp;amp;Set
						OUT	=WORK.&amp;amp;Set.V
						VARNUM
						NOPRINT
						;
	RUN;

	PROC	SQL	NOPRINT;
		SELECT	CATS(Name, '=', TRANWRD(lowcase(SUBSTR(Name, 1, LENGTH("&amp;amp;Pfx"))), lowcase("&amp;amp;Pfx"), ''), SUBSTR(Name, LENGTH("&amp;amp;Pfx") + 1))
			AS	Work_Var	LENGTH=512
			INTO	:	Vars	SEPARATED BY ' '
			FROM	WORK.&amp;amp;Set.V
			;
	QUIT;

	%&amp;amp;Cmnt.PUT	NOTE:  &amp;amp;=Vars;

	PROC	DATASETS	LIBRARY=&amp;amp;Lib	NOLIST;
		MODIFY	&amp;amp;Set;
			RENAME	
				&amp;amp;Vars
				;
	QUIT;
%MEND	Remove_Prefix_From_Var;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I used SAShelp.gas to generate test data like so:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data	work.gas;
	set	sashelp.gas(Rename=(cpratio=dx_CpRatio
							EqRatio=dx_EqRatio
							Fuel=Dx_Fuel
							NOx=Dx_NOx))
							;
run;

**-----------------------------------------------------------------------------**;

%Remove_Prefix_From_Var(Lib=WORK, Set=Gas, Pfx=Dx_, Debug=YES);&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;But of course you have actual data.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If it's useful, great.&amp;nbsp; Otherwise, carry on.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Jim&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Sep 2020 23:01:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-new-dummy-variables-from-multiple-columns-of-character/m-p/686569#M208368</guid>
      <dc:creator>jimbarbour</dc:creator>
      <dc:date>2020-09-24T23:01:54Z</dc:date>
    </item>
  </channel>
</rss>

