Hi mspak, Please find included a revised solution. This new revised solution will perform Market/Sales Share calculations on two levels 1. Ratio/Percent at the Industry Segment for a given fyear and radius 2. Ratio/Percent at the different distances amongst the zips within the given Industry Segment/Fyear/Radius combination (Drill Down on Radius to see how the Zips/companys are connected to each other across the different distance bins/clusters) %macro shrinkCombos( p_inDsName= /* Input Source Data Source */ , p_outDsName= /* Output Data Set */ , p_byClause= /* Classification Variables Set */ , p_classVar= /* Class Variable to Transpose */ , p_classVar2= /* Second Class Variable to Transpose (Optional) */ , p_prntClassVar= /* Preceding Class Variable */ , p_aggrVarLen=10 /* Variable length of the new transposed values */ ); /* Sort */ PROC SORT DATA=&p_inDsName; BY &p_byClause; RUN; /* Process the data and generate the transposed value */ DATA &p_outDsName(DROP=&p_classVar RENAME=(aggr=&p_classVar)); SET &p_inDsName; BY &p_byClause; LENGTH aggr $&p_aggrVarLen ; RETAIN aggr ; IF (FIRST.&p_prntClassVar) THEN aggr=''; %if (%superq(p_classVar2) NE ) %then %do; if (INDEX(STRIP(aggr), STRIP(&p_classVar2)) = 0) then aggr = catx(' ',aggr,&p_classVar2); if (INDEX(STRIP(aggr), STRIP(&p_classVar)) = 0) then aggr = catx(' ',aggr,&p_classVar); DROP &p_classVar2; %end; %else %do; aggr = catx(' ',aggr,&p_classVar); %end; IF (LAST.&p_prntClassVar) THEN OUTPUT; RUN; %mend shrinkCombos; %macro getMarketShareByDistance(p_inDsName= ,p_year=, p_SIC3=, p_radius=60, p_outDsName=); %local l_count l_zips l_maxFreq l_maxFreq2; /* ------------------------------------------------------- */ /* Get unique list of Firms Zip Codes for the */ /* specified Year/industry classification code combination */ /* ------------------------------------------------------- */ proc sort data=&p_inDsName out=work.zips(keep=zip) nodupkey; BY zip; where (zipcity(zip) is not null %if(%superq(p_year) NE ) %then %do; AND fyear EQ &p_year %end; %if(%superq(p_SIC3) NE ) %then %do; AND SIC3 EQ "&p_SIC3" %end; ); run; /* ------------------------------------------------ */ /* Workout the available Zip City Distance clusters */ /* ------------------------------------------------ */ Proc sql noprint; Select quote(put(zip,z5.)) into :l_zips separated by ' ' from work.zips ; quit; %let l_count=&sqlobs; /* %put l_count=&l_count; %put l_zips = &l_zips; */ data work.zips(keep=pointA pointB distance); Set work.zips; LENGTH pointA pointB $5; array zips [&l_count] $5 (&l_zips); pointA = put(zip,z5.); do i=1 to dim(zips); pointB= zips; distance = zipcitydistance(pointA, pointB); /* Ignore zero distances */ if &p_radius >= distance > 0 then output; end; run; /* Get record count for calculating variable length */ proc sql noprint; select max(_freq_) into :l_maxFreq from ( select distance ,pointA ,count(pointB) as _freq_ from work.zips Group by distance ,pointA) ; quit; /*%put l_maxFreq=&l_maxFreq;*/ /* Work out Zips Connection Points */ %shrinkCombos( p_inDsName=work.zips, p_outDsName=work.zips_connctions , p_byClause=%str(distance pointA pointB), p_classVar=pointB, p_prntClassVar=pointA, p_classVar2=, p_aggrVarLen=%eval(&l_maxFreq*6)); /* Get record count for calculating variable length */ proc sql noprint; select max(_freq_) into :l_maxFreq2 from ( select distance ,count(pointA) as _freq_ from work.zips_connctions Group by distance) ; quit; /*%put l_maxFreq2=&l_maxFreq2;*/ /* Work out the Distance Clusters and their contributing members */ %shrinkCombos( p_inDsName=work.zips_connctions, p_outDsName=work.dist_clusters , p_byClause=%str(distance pointA), p_classVar=pointA, p_prntClassVar=distance, p_classVar2=pointB, p_aggrVarLen=%eval((&l_maxFreq*6) + 1 + (&l_maxFreq2*6))); proc sql; create table work.combined AS select a.* , b.distance , b.pointA AS cluster from &p_inDsName a, work.dist_clusters b where INDEX(STRIP(b.pointA), put(a.zip,z5.)) GT 0 %if(%superq(p_year) NE ) %then %do; AND fyear EQ &p_year %end; %if(%superq(p_SIC3) NE ) %then %do; AND SIC3 EQ "&p_SIC3" %end; ORDER BY b.distance, a.zip; quit; data &p_outDsName; total=0; do until (last.distance); SET work.combined; BY distance zip; total+sale; end; do until (last.distance); SET work.combined; BY distance zip; /* Avoid Division by Zero*/ IF (total > 0) then ratio = sale/total; else ratio = 0; output; end; format ratio percent8.3; run; /* Clean up */ proc datasets lib=work nolist; delete zips zips_connctions dist_clusters combined; run; quit; %mend getMarketShareByDistance; %macro getMarketShareByRadius( p_inDsName= , p_year= , p_sic3= , p_radius= , p_outRadiusSharesDsName= , p_outDistanceSharesDsName= ); %local l_sTime l_eTime l_rTime; /** Record Starting Time **/ %let l_sTime = %sysfunc(time()); /* --------------------------------- */ /* Get a list of unique combinations */ /* of Fyear/Industry Section */ /* --------------------------------- */ proc summary data=&p_inDsName nway; class fyear sic3; where (zipcity(zip) is not null %if(%superq(p_year) NE ) %then %do; AND fyear EQ &p_year %end; %if(%superq(p_SIC3) NE ) %then %do; AND SIC3 EQ "&p_SIC3" %end; ); output out=work.combos(drop=_:); run; FILENAME codes TEMP; DATA _NULL_; FILE codes; SET work.combos end=eof; put '%getMarketShareByDistance(p_inDsName='"&p_inDsName ,p_year=" fyear +(-1)", p_SIC3=" sic3 +(-1)", p_radius=&p_radius, p_outDsName=work._"_n_ +(-1)");"; put "PROC APPEND BASE=&p_outDistanceSharesDsName DATA=WORK._" _n_ +(-1)" FORCE; RUN;"; PUT "PROC DATASETS LIB=WORK NOLIST; DELETE _:; RUN; QUIT;"; if (eof) then do; put ; put "proc sql;"; put +3 "create table &p_outRadiusSharesDsName AS"; put +3 "select distinct fyear, sic3, zip, tic, sale"; put +3 "from &p_outDistanceSharesDsName"; put +3 "group by fyear, sic3, zip, tic"; put +3 "order by fyear, sic3, zip, tic;"; put "quit;"; put ; put "data &p_outRadiusSharesDsName;"; put +3 "total=0;"; put +3 "radius=&p_radius;"; put +3 "do until (last.sic3);"; put +6 "SET &p_outRadiusSharesDsName;"; put +6 "BY fyear sic3;"; put +6 "total+sale;"; put +3 "end;"; put ; put +3 "do until (last.sic3);"; put +6 "SET &p_outRadiusSharesDsName;"; put +6 "BY fyear sic3;"; put +6 "IF (total > 0) then ratio = sale/total;"; put +6 "else ratio = 0;"; put +6 "output;"; put +3 "end;"; put +3 "format ratio percent8.2;"; put "run;"; end; RUN; %include codes; /** Record Finish Time **/ %let l_eTime = %sysfunc(time()); %if (%superq(l_sTime) NE ) %then %do; /* Calculate Run Time and display it */ %let l_rTime = %sysfunc(putn(%sysevalf(&l_eTime - &l_sTime),time12.2)); %put >>> getMarketShareByRadius :>>> Total RunTime = &l_rTime; %end; %mend getMarketShareByRadius; /* ----------------------- */ /* Usage Example */ /* ----------------------- */ libname comm "C:\projects\SAS_Community"; %getMarketShareByRadius( p_inDsName=comm.geo_shr /* Input Data */ , p_year= /* Fyear value */ , p_sic3= /* Industry Segment */ , p_radius=60 /* Within 60 miles radius */ , p_outRadiusSharesDsName=work.marketShares /* Output Market Shares within 60 miles Radius Data set */ , p_outDistanceSharesDsName=work.marketSharesByDistance /* Output Market Shares for the various distance bins within 60 miles radius Data Set */ ); Just another way of analyzing the data. Regards, Ahmed
... View more