Hello fellow SAS users,
I am having difficult writing a program that will allow me to repeat the below code by different characteristics.
Descriton of data:
1. Time-series financial data for corporations
2. Three categorical variables: Country, Year, Industry
I need to calculate market share data per country/year/industry. The below code executes that exactly as needed, but only by industry. I am looking for assistance in running this code by Year+Industry (for overall by year), and then by Country+Year+Industry.
I admit that I wrote this code a couple years ago and have not run it again since. At that time, I just copy/pasted it (but that is not very efficient, since there are over 13,000 combinations). Any help would be appreciated.
ODS HTML CLOSE; /*CLOSE PREVIOUS*/
ODS HTML; /*OPEN NEW*/
proc sort data=spec; by dscd fyear; run;
proc sort data=primary; by dscd; run;
data spec2; merge spec primary; by dscd; if primary=0 then delete;
run;
DATA SPEC3; SET SPEC2;
IF AUDPARENT='NULL' OR AUDPARENT='PVNT' OR AUDPARENT='PVND' THEN AUDITOR = AUDCODE; ELSE AUDITOR=AUDPARENT;
drop primary audname audparent;
RUN;
proc sort data=spec3; by country fyear indcode; run;
*SETUP *;
DATA GLOBAL_TABLES; SET SPEC3;
INDCODE_LAG = LAG(INDCODE);
INDCODE_LAG2 = LAG2(INDCODE);
INDCODE_LAG3 = LAG3(INDCODE);
INDCODE_LAG4 = LAG4(INDCODE);
INDCODE_LAG5 = LAG5(INDCODE);
INDCODE_LAG6 = LAG6(INDCODE);
IF INDCODE_LAG = . THEN INDCODE_LAG=0;
IF INDCODE_LAG2 = . THEN INDCODE_LAG2=0;
IF INDCODE_LAG3 = . THEN INDCODE_LAG3=0;
IF INDCODE_LAG4 = . THEN INDCODE_LAG4=0;
IF INDCODE_LAG5 = . THEN INDCODE_LAG5=0;
IF INDCODE-INDCODE_LAG=1 THEN INDRANK=1;
IF INDCODE-INDCODE_LAG=0 THEN INDRANK=2;
IF INDCODE-INDCODE_LAG2=0 THEN INDRANK=3;
IF INDCODE-INDCODE_LAG3=0 THEN INDRANK=4;
IF INDCODE-INDCODE_LAG4=0 THEN INDRANK=5;
IF INDCODE-INDCODE_LAG5=0 THEN INDRANK=6;
AUD1=LAG4(AUDITOR);
AUD2=LAG3(AUDITOR);
AUD3=LAG2(AUDITOR);
AUD4=LAG(AUDITOR);
AUD5=AUDITOR;
RUN;
PROC SORT DATA=GLOBAL_TABLES; BY FYEAR INDCODE; RUN;
*BEGIN INDUSTRY TOTALS*;
DATA GLOBAL_IND_TOTALS; SET GLOBAL_TABLES;
BY INDCODE;
IF INDCODE>0 THEN COUNT=1; ELSE COUNT=0;
IF First.INDCODE=1 THEN TOTAL_ASSETS_USD=0;
TOTAL_ASSETS_USD+ASSETS_USD;
IF First.INDCODE=1 THEN TOTAL_REV=0;
TOTAL_REV+REV;
IF First.INDCODE=1 THEN TOTAL_AUDFEE=0;
TOTAL_AUDFEE+AUDFEE;
IF First.INDCODE=1 THEN IND_COUNT=0;
IND_COUNT+COUNT;
IF Last.INDCODE=0 THEN DELETE;
DROP FYE_DATE INDCODE_LAG INDCODE_LAG2 INDCODE_LAG3 INDCODE_LAG4 INDCODE_LAG5 INDCODE_LAG6 INDRANK AUD1 AUD2 AUD3 AUD4 AUD5 ;
RUN;
*BEGIN HERFINDAHL INDEX*;
*OBTAIN TOTAL ASSETS_USD PER AUDITOR PER INDUSTRY*;
PROC SORT DATA= GLOBAL_TABLES; BY INDCODE AUDITOR; RUN;
DATA GLOBAL_AUD_TOTALS; SET GLOBAL_TABLES;
BY INDCODE AUDITOR;
IF INDCODE>0 THEN COUNT=1;
IF First.AUDITOR=1 THEN AUD_ASSETS_USD=0;
AUD_ASSETS_USD+ASSETS_USD;
IF First.AUDITOR=1 THEN AUD_REV=0;
AUD_REV+REV;
IF First.AUDITOR=1 THEN AUD_COUNT=0;
AUD_COUNT+COUNT;
IF Last.AUDITOR=0 THEN DELETE;
DROP NAME CIK SIC SIC_DESC AUDKEY AUDFEE RELFEE NONFEE TOTFEE RESTATE FYE_DATE FYE FYE_MONTH SIGDATE REV NI ASSETS_USD INDUS2 INDUS3 FF_IND INDCODE_LAG INDCODE_LAG2 INDCODE_LAG3 INDCODE_LAG4 INDCODE_LAG5 INDCODE_LAG6 INDRANK AUD1 AUD2 AUD3 AUD4 AUD5 TOP2 TOP1_3 TOP2_3 TOP3 TOP5 TOP1_ANY2;
RUN;
*MERGE AUDIT TOTALS WITH INDUSTRY TOTALS;
DATA GLOBAL_HERF;
MERGE GLOBAL_IND_TOTALS GLOBAL_AUD_TOTALS;
BY INDCODE;
DROP TOTAL_REV TOTAL_AUDFEE TOTAL_TOTFEE;
MKTSHARE_ASSETS_USD=AUD_ASSETS_USD/TOTAL_ASSETS_USD;
MKTSHARE_SALES=AUD_REV/TOTAL_REV;
MKTSHARE_COUNT=AUD_COUNT/IND_COUNT;
DROP AUDCODE AUDNAME AUDPARENT BIG4 ;
RUN;
*AT THIS POINT, THE ABOVE FILE HAS EACH AUDITOR'S MARKET SHARE IN EACH INDUSTRY*;
*CALCULATE INDUSTRY SPECIALISTS*;
*ASSETS_USD AUDITED*;
PROC SORT DATA= GLOBAL_HERF; BY INDCODE DESCENDING MKTSHARE_ASSETS_USD ; RUN;
DATA GLOBAL_ASSETS_USD_SPEC; SET GLOBAL_HERF; BY INDCODE;
IF First.INDCODE=1 THEN FIRST=1; ELSE FIRST=0;
SECOND = LAG(FIRST);
THIRD = LAG2(FIRST);
FOURTH = LAG3 (FIRST);
IF FIRST=0 AND SECOND=0 AND THIRD=0 AND FOURTH = 0 THEN DELETE;
AUDITOR_L = LAG(AUDITOR);
*IF FIRST=0 THEN TOP1_ASSETS_USD_MKTSHARE = AUDITOR_L; *ELSE TOP1_ASSETS_USD_MKTSHARE = AUDITOR;
*IF FIRST=0 THEN TOP2_ASSETS_USD_MKTSHARE = AUDITOR; *ELSE TOP2_ASSETS_USD_MKTSHARE = .;
*IF FIRST=1 AND MKTSHARE_COUNT NE 1 THEN DELETE;
DROP assets total_totfee total_assets_usd total_rev total_audfee COUNT TOTAL_ASSETS_USD IND_COUNT DSCD AUD_ASSETS_USD AUD_REV AUD_COUNT AUDITOR_L COUNT TOTAL_ASSETS_USD IND_COUNT AUD_ASSETS_USD AUD_REV AUD_COUNT ;
RUN;
PROC PRINT DATA=GLOBAL_ASSETS_USD_SPEC; RUN;
*AT THIS POINT, THE ABOVE FILE CONTAINS THE TOP4 AUDITORS AND THEIR CORRESPONDING MARKET SHARES*
You may try convert your program into a macro program:
%macro report_by(by=);
... copy here your program with some addaptation...
... replace your BY INCODE into by &by ...
%mend report_by;
%report_by(by=incode);
%report_by(by=fyear incode);
%report_by(by=fyear);
... any other combination...
In some cases you may need add or addapt SORT BY too;
In other cases you may want addapt code to the BY variables usinf %IF ... %THEN ...; %else ... ;
Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.
Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.