BookmarkSubscribeRSS Feed
Reeza
Super User

This gets you really close. You'll need to play around with the rounding to get an exact match. 

 

data final;
    input Eink :$2. Bg He;
    cards;
01 55.487739 2.722371
02 127.485645 18.670678
03 182.95309 53.529988
04 354.69612 122.703093
05 450.777081 137.596942
06 461.617933 157.457164
07 429.38197 192.283645
08 370.644235 173.688658
09 281.256479 180.848677
10 170.426973 137.045263
11 118.22022 128.45571
12 93.656461 104.596442
13 68.561622 112.710123
14 42.443895 77.742469
15 50.933162 136.718895
16 35.285987 104.219131
17 23.123863 78.424041
18 16.724659 89.48717
19 9.271308 46.396113
20 4.870804 21.574738
21 4.83902 19.753305
22 3.487616 9.23105
23 0.860551 2.136266
24 1.202043 1.861731
;

data max_min;
    length Eink $2 Xmin Xmax 8;

    do xmax=250 to 3500 by 250, 4000, 4500, 5000, 6000, 7000, 8000, 10000, 15000, 
            25000, .;
        i+1;
        eink=put(i, z2.);
        xmin=sum(0, lag(xmax));
        output;
    end;
    drop i;
run;

data eink_2;
    merge final max_min;
    by eink;
run;

proc sort data=eink_2;
    by eink xmax xmin;
run;

proc sql noprint;
    select name into :var_list separated by " " from sashelp.vcolumn where 
        libname='WORK' and memname='EINK_2' and upcase(name) not in ('EINK', 'XMAX', 
        'XMIN');
quit;

%put &var_list.;

proc transpose data=eink_2 out=tall(rename=(col1=VALUE)) name=VARNAME;
    by Eink xmax xmin;
    var &var_list;
run;

proc sort data=tall;
    by varname eink;
run;

*calculate cumulative sum;

data eink_3;
    set tall;
    by varname;

    if first.varname then
        cum_sum=value;
    else
        cum_sum+value;
run;

*get totals and substract from cumulative numbers;

proc sql;
    create table eink_4 as select varname, value, xmin, xmax, cum_sum, 
        sum(value)/2 as sum_var, calculated sum_var - cum_sum as fx_pra from eink_3 
        group by varname order by varname, eink;
quit;

*rank data ( really just reverses eink order for everything greater than 0?);

proc rank data=eink_4 out=ranked_data;
    where fx_pra>0;
    by varname;
    var fx_pra;
    ranks rang;
run;

*get first ranks only;

data first_ranks;
    set ranked_data;
    where rang=1;
    cum_sum=round(cum_sum, 0.0000001);
    keep varname value sum_var cum_sum;
run;

*repeat rank process for >0;
*rank data ( really just reverses eink order for everything greater than 0?);

proc rank data=eink_4 descending out=ranked_data_reversed;
    where fx_pra<0;
    by varname;
    var fx_pra;
    ranks rang;
run;

*get first ranks only;

data first_ranks_reversed;
    set ranked_data_reversed;
    where rang=1;
    B=xmax-xmin;
    value=round(value, 0.0000001);
    keep varname xmin B Value;
    rename xmin=reversed_xmin value=reversed_value;
run;

data results;
    merge first_ranks first_ranks_reversed;
    by varname;
    MEDIAN=reversed_xmin + B*((sum_var-cum_sum)/reversed_value);
    format MEDIAN 15.12;
run;

proc transpose data=results out=want NAME=EINK;
    id varname;
    var MEDIAN;
run;
Tom
Super User Tom
Super User

Thanks, @Reeza .  So you just need to find the point where the cumulative sum crosses the 50% threshold.

 

@Konkordanz I don't think you need PROC RANK and that other stuff.  And if you have explained what calculation you wanted:

* The MEDIAN is generated by interpolating between XMIN and XMAX ;
* Amount is based on how much of current value is before 50 percent ;

at the start we could have found a simple solution much faster.

 

Here is a macro that just needs the input dataset and the variable list and does it all in one data step. So you can run it for as many datasets and variables as you have.

 

(Note: The XMIN/XMAX values need to already be in the input dataset).

%macro adj_median
(indsn    /* Input dataset */
,varlist  /* space delimited variable list */
,outdsn=medians /* Output dataset */
);
%local n ;
%* Count how many variables there are ;
%let n=%sysfunc(countw(&varlist,%str( ),q));
data &outdsn ;
* Setup arrays ;
  array vars &varlist ;
  array total total1-total&n ;
  array subtot subtot1-subtot&n ;
  array median median1-median&n ;
* Calculate Totals ;
  do until(eof1);
    set &indsn end=eof1;
    do over vars;
      total+vars;
    end;
  end;
* Find 50 percent of totals ;
  do over total;
    total=total/2;
  end;
* Calculate cummulative total until cross half of total ;
* Continue until either hit the end of the data or have ;
* found median for every variable;
  do until(eof2 or n>=&n);
    set &indsn end=eof2;
    do over vars;
      subtot+vars;
      if subtot > total and missing(median) then do;
* The MEDIAN is generated by interpolating between XMIN and XMAX ;
* Amount is based on how much of current value is before 50 percent ;
        median = xmin + (xmax-xmin)*((total+vars-subtot)/vars) ;
        n+1;
      end;
    end;
  end;
* Store the medians back into the original variables;
  do over vars;
    vars = median;
  end;
* Only keep the original variables ;
  keep &varlist;
run;
%mend adj_median;

PS I don't like typing [i] over and over and over and over so I just made it use old fashion implicit array references.

So let's try if for our FINAL and MIN_MAX datasets.

data for_analysis;
  merge final min_max;
  by eink;
run;

options mprint;
%adj_median(indsn=for_analysis,varlist=Bg He,outdsn=WANT);

proc print data=want;
run;


Results

Obs       Bg         He

 1     1526.83    2278.14

Log

Spoiler
3648  options mprint;
3649  %adj_median(indsn=for_analysis,varlist=Bg He,outdsn=WANT);
MPRINT(ADJ_MEDIAN):   data WANT ;
MPRINT(ADJ_MEDIAN):   * Setup arrays ;
MPRINT(ADJ_MEDIAN):   array vars Bg He ;
MPRINT(ADJ_MEDIAN):   array total total1-total2 ;
MPRINT(ADJ_MEDIAN):   array subtot subtot1-subtot2 ;
MPRINT(ADJ_MEDIAN):   array median median1-median2 ;
MPRINT(ADJ_MEDIAN):   * Calculate Totals ;
MPRINT(ADJ_MEDIAN):   do until(eof1);
MPRINT(ADJ_MEDIAN):   set for_analysis end=eof1;
MPRINT(ADJ_MEDIAN):   do over vars;
MPRINT(ADJ_MEDIAN):   total+vars;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   * Find 50 percent of totals ;
MPRINT(ADJ_MEDIAN):   do over total;
MPRINT(ADJ_MEDIAN):   total=total/2;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   * Calculate cummulative total until cross half of total ;
MPRINT(ADJ_MEDIAN):   * Continue until either hit the end of the data or have ;
MPRINT(ADJ_MEDIAN):   * found median for every variable;
MPRINT(ADJ_MEDIAN):   do until(eof2 or n>=2);
MPRINT(ADJ_MEDIAN):   set for_analysis end=eof2;
MPRINT(ADJ_MEDIAN):   do over vars;
MPRINT(ADJ_MEDIAN):   subtot+vars;
MPRINT(ADJ_MEDIAN):   if subtot > total and missing(median) then do;
MPRINT(ADJ_MEDIAN):   * The MEDIAN is generated by interpolating between XMIN and XMAX ;
MPRINT(ADJ_MEDIAN):   * Amount is based on how much of current value is before 50 percent ;
MPRINT(ADJ_MEDIAN):   median = xmin + (xmax-xmin)*((total+vars-subtot)/vars) ;
MPRINT(ADJ_MEDIAN):   n+1;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   * Store the medians back into the original variables;
MPRINT(ADJ_MEDIAN):   do over vars;
MPRINT(ADJ_MEDIAN):   vars = median;
MPRINT(ADJ_MEDIAN):   end;
MPRINT(ADJ_MEDIAN):   * Only keep the original variables ;
MPRINT(ADJ_MEDIAN):   keep Bg He;
MPRINT(ADJ_MEDIAN):   run;

NOTE: There were 24 observations read from the data set WORK.FOR_ANALYSIS.
NOTE: There were 11 observations read from the data set WORK.FOR_ANALYSIS.
NOTE: The data set WORK.WANT has 1 observations and 2 variables.
NOTE: DATA statement used (Total process time):
      real time           0.00 seconds
      cpu time            0.00 seconds

 

Konkordanz
Pyrite | Level 9

Oh, thank you for your effort and your responses! I will look at them in the coming days and get back to you. This will take a while because I need to get into your detailed answers and the many syntax steps that are new to me. Thanks!

SAS Innovate 2025: Call for Content

Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!

Submit your idea!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 17 replies
  • 2093 views
  • 2 likes
  • 6 in conversation