Hi, Your task can't be finished in one step (not by me at least). Following code has not been intensively tested: /*purge records with missing data*/ data have; set h.connected_male; if cmiss(ticker,dirgender)=0; if nmiss(iddir,year)=0; run; proc sql; /*Mark the ticker-year group with female on board*/ create table want1 as select distinct *, SUM(upcase(dirgender)='FEMALE')>0 AS _FEMALE FROM have GROUP BY TICKER,YEAR ; /*mark the iddir who holds multiple positions in different companies at the same period*/ create table want2 as select distinct *, count(distinct ticker)>1 as _mult from want1 group by iddir, year; /*count the positions per iddir-year */ create table want3 as select distinct a.*,b._mult from want1 a, want2 b where a.ticker=b.ticker and a.year=b.year and a.iddir=b.iddir ; create table want4 as select distinct *,sum(_female=1 and _mult=1) as _ct from want3 group by iddir, year; quit; /*Mark the iddir of interest*/ data want5; set want4; if ((_mult=0 and _ct>0) or (_mult=1 and _ct>1)) and upcase(dirgender)='MALE' then flag=1; else flag=0; run; /*Calculated the percentage and sorted by descending percentage*/ proc sql; create table want6 as select ticker,year, sum(flag)/sum(upcase(dirgender)='MALE') as percent format=percent7.2 from want5 group by ticker, year order by percent desc, ticker,year; quit; Regards, Haikuo
... View more