<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Organizing my code in SAS Studio</title>
    <link>https://communities.sas.com/t5/SAS-Studio/Organizing-my-code/m-p/445996#M4981</link>
    <description>&lt;P&gt;Good afternoon!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The code for my term paper is a bit of a mess right now. I am hoping someone on this page could help me organize it/identify extraneous code.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;One of my main issues is that I want to simply merge two datasets, but I have ended up somehow with:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;PAPER.COMPUSTAT_EXECUCOMP&lt;/P&gt;&lt;P&gt;WORK.CEO&lt;/P&gt;&lt;P&gt;PAPER.CSRP_MONTHLY_STOCK&lt;/P&gt;&lt;P&gt;WORK.CEO_SORTED&lt;/P&gt;&lt;P&gt;WORK.FIRMS_SORTED&lt;/P&gt;&lt;P&gt;WORK.CEO_FIRM&lt;/P&gt;&lt;P&gt;WORK.CEOS_SORTED_NEW&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;That is when I run all of my code. Here it is:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;libname paper "~/425/425_Final_Paper";

*import compustat dataset;
PROC IMPORT out=paper.CompuStat_Execucomp
	datafile="~/425/425_Final_Paper/CompuStat_Execucomp.csv"
	DBMS=CSV replace;
	getnames= yes;
	guessingrows=2000;
run;

*keep only CEOs;
data CEO;
	set paper.Compustat_ExecuComp;
 	if CEOANN = 'CEO' then output CEO;
run;

*import csrp dataset;
PROC IMPORT out=paper.CSRP_Monthly_Stock
	datafile="~/425/425_Final_Paper/CSRP_MonthlyStock.csv"
	DBMS=CSV replace;
	getnames= yes;
/*guessingrows=max;*/
run;

*delete rows with missing cusips;
data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	if nmiss(cusip) &amp;gt; 0 then delete;
run;

*create new variable year;
data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	year = int( date/10000 );
run;

*delete rows with missing return data;
data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	if nmiss(ret) &amp;gt; 0 then delete;
run;

*sort by ticker;
proc sort 
	data=paper.CSRP_Monthly_Stock;
	by ticker;
run;

data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	by ticker date;
	retain annual_return 1;
	annual_return = annual_return * (1 + RET);
	month = int( mod(date,10000) / 100);
	if month = 12 or last.ticker;
	annual_return = (annual_return - 1) * 100;
	output;
	annual_return = 1;
run;

/*practice merging*/
libname paper "~/425/425_Final_Paper";

/*keep only CEOs*/
data CEO;
	set paper.Compustat_ExecuComp;
 	if CEOANN = 'CEO' then output CEO;
run;

* 1. Sort CEO data by cusip and year &amp;amp; save sorted file as ceo_sorted ; 
PROC SORT DATA=paper.Compustat_ExecuComp OUT=ceos_sorted; 
  BY year; 
RUN; 

libname paper "~/425/425_Final_Paper";

* 2. Sort firm data by cusip and year &amp;amp; save sorted file as firms_sorted ; 
PROC SORT DATA=paper.CSRP_Monthly_Stock OUT=firms_sorted; 
  BY year; 
RUN; 

libname paper "~/425/425_Final_Paper";

/*keep only CEOs*/
data CEO;
	set paper.Compustat_ExecuComp;
 	if CEOANN = 'CEO' then output CEO;
run;

* 3. Merge CEO data and firm data by year in a data step; 
DATA ceo_firm ; 
  MERGE ceos_sorted_new firms_sorted; 
  BY year; 
RUN; 

/*use proc contents to see if there is a type mismatch*/
proc contents 
 	data=paper.CSRP_Monthly_Stock;
run;

proc contents 
 	data=paper.Compustat_ExecuComp;
run;

* change cusip to numeric variable; 
data ceos_sorted_new;
set ceos_sorted (rename=(cusip=temp));
cusip = input(temp, 12.);
drop temp;
run;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Let me know what you think!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks so much.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;-SAStuck&lt;/P&gt;</description>
    <pubDate>Thu, 15 Mar 2018 20:45:49 GMT</pubDate>
    <dc:creator>sastuck</dc:creator>
    <dc:date>2018-03-15T20:45:49Z</dc:date>
    <item>
      <title>Organizing my code</title>
      <link>https://communities.sas.com/t5/SAS-Studio/Organizing-my-code/m-p/445996#M4981</link>
      <description>&lt;P&gt;Good afternoon!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The code for my term paper is a bit of a mess right now. I am hoping someone on this page could help me organize it/identify extraneous code.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;One of my main issues is that I want to simply merge two datasets, but I have ended up somehow with:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;PAPER.COMPUSTAT_EXECUCOMP&lt;/P&gt;&lt;P&gt;WORK.CEO&lt;/P&gt;&lt;P&gt;PAPER.CSRP_MONTHLY_STOCK&lt;/P&gt;&lt;P&gt;WORK.CEO_SORTED&lt;/P&gt;&lt;P&gt;WORK.FIRMS_SORTED&lt;/P&gt;&lt;P&gt;WORK.CEO_FIRM&lt;/P&gt;&lt;P&gt;WORK.CEOS_SORTED_NEW&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;That is when I run all of my code. Here it is:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;libname paper "~/425/425_Final_Paper";

*import compustat dataset;
PROC IMPORT out=paper.CompuStat_Execucomp
	datafile="~/425/425_Final_Paper/CompuStat_Execucomp.csv"
	DBMS=CSV replace;
	getnames= yes;
	guessingrows=2000;
run;

*keep only CEOs;
data CEO;
	set paper.Compustat_ExecuComp;
 	if CEOANN = 'CEO' then output CEO;
run;

*import csrp dataset;
PROC IMPORT out=paper.CSRP_Monthly_Stock
	datafile="~/425/425_Final_Paper/CSRP_MonthlyStock.csv"
	DBMS=CSV replace;
	getnames= yes;
/*guessingrows=max;*/
run;

*delete rows with missing cusips;
data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	if nmiss(cusip) &amp;gt; 0 then delete;
run;

*create new variable year;
data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	year = int( date/10000 );
run;

*delete rows with missing return data;
data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	if nmiss(ret) &amp;gt; 0 then delete;
run;

*sort by ticker;
proc sort 
	data=paper.CSRP_Monthly_Stock;
	by ticker;
run;

data paper.CSRP_Monthly_Stock;
	set paper.CSRP_Monthly_Stock;
	by ticker date;
	retain annual_return 1;
	annual_return = annual_return * (1 + RET);
	month = int( mod(date,10000) / 100);
	if month = 12 or last.ticker;
	annual_return = (annual_return - 1) * 100;
	output;
	annual_return = 1;
run;

/*practice merging*/
libname paper "~/425/425_Final_Paper";

/*keep only CEOs*/
data CEO;
	set paper.Compustat_ExecuComp;
 	if CEOANN = 'CEO' then output CEO;
run;

* 1. Sort CEO data by cusip and year &amp;amp; save sorted file as ceo_sorted ; 
PROC SORT DATA=paper.Compustat_ExecuComp OUT=ceos_sorted; 
  BY year; 
RUN; 

libname paper "~/425/425_Final_Paper";

* 2. Sort firm data by cusip and year &amp;amp; save sorted file as firms_sorted ; 
PROC SORT DATA=paper.CSRP_Monthly_Stock OUT=firms_sorted; 
  BY year; 
RUN; 

libname paper "~/425/425_Final_Paper";

/*keep only CEOs*/
data CEO;
	set paper.Compustat_ExecuComp;
 	if CEOANN = 'CEO' then output CEO;
run;

* 3. Merge CEO data and firm data by year in a data step; 
DATA ceo_firm ; 
  MERGE ceos_sorted_new firms_sorted; 
  BY year; 
RUN; 

/*use proc contents to see if there is a type mismatch*/
proc contents 
 	data=paper.CSRP_Monthly_Stock;
run;

proc contents 
 	data=paper.Compustat_ExecuComp;
run;

* change cusip to numeric variable; 
data ceos_sorted_new;
set ceos_sorted (rename=(cusip=temp));
cusip = input(temp, 12.);
drop temp;
run;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Let me know what you think!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks so much.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;-SAStuck&lt;/P&gt;</description>
      <pubDate>Thu, 15 Mar 2018 20:45:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Studio/Organizing-my-code/m-p/445996#M4981</guid>
      <dc:creator>sastuck</dc:creator>
      <dc:date>2018-03-15T20:45:49Z</dc:date>
    </item>
    <item>
      <title>Re: Organizing my code</title>
      <link>https://communities.sas.com/t5/SAS-Studio/Organizing-my-code/m-p/446089#M4985</link>
      <description>Simple suggestion is to do more in each data step, like you have three rounds of CSRP_Monthly_Stock in the beginning that could be consolidated to one step.</description>
      <pubDate>Fri, 16 Mar 2018 07:16:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Studio/Organizing-my-code/m-p/446089#M4985</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2018-03-16T07:16:41Z</dc:date>
    </item>
  </channel>
</rss>

