One last post from my side: If anyone is interested in the different approaches, attached you will find 4 solutions (two sql, one data set with loop, one with hash). These are all basically copied from the answers above (so not my solution). This post is only ment to provide a basis for otheres who want to go deeper into this topic (e.g. assess performance). Kind regards and thanks again to everyone! data sample;
input Name $ Age Height;
datalines;
Max 10 120
Paul 12 140
Rick 15 130
Morti 8 90
Carl 9 100
Maria 14 150
Claus 13 160
Mario 6 50
Tina 11 100
;
run;
%let range = 1;
/**********************START: SOLUTION USING PROC SQL***************************************/
proc sql;
create table test as
select
a.Name as A_Name
,a.Age as Age
,b.Age as Age
,a.Height as A_Height
,b.Height as B_Height
from
sample a
,sample b
where
b.Age between a.Age-&range. and a.Age+&range.
;
quit;
proc summary data=test nway;
class A_Name;
var B_Height;
output out=want_proc_sql mean= / autoname;
run;
proc delete data=test;
run;
/**********************END: SOLUTION USING PROC SQL***************************************/
/**********************START: SOLUTION USING PROC SQL***************************************/
proc sql;
create table want_proc_sql_2 as
select a.name
,a.Age
,(select mean(Height)
from sample
where Age between a.Age-&range and a.Age+&range)
as expected_Height
,a.Height
from sample a
;
quit;
/**********************END: SOLUTION USING PROC SQL***************************************/
/**********************START: SOLUTION USING DATA STEP***************************************/
data want_data_step;
set sample nobs = num ; /*num initialized and assigned number of observations (nobs)*/
loage = age - ⦥
hiage = age + ⦥
sumheight = 0; count = 0;
do p = 1 to num; /* for each observation in the new want data, these steps are executed NUM times*/
set sample(keep = age height rename=(height = ht age = age1)) point = p; /*p points at number of current observation (within the loop data set)*/
flag = (loage <= age1 <= hiage);
if flag > 0 then do;
sumheight + ht;
count + 1;
end;
end;
averageheight = sumheight / count;
drop age1 ht flag;
run;
/**********************END: SOLUTION USING DATA STEP***************************************/
/**********************START: SOLUTION USING Hash in Data Step***************************************/
data want_Hash;
if _n_ = 1 then do;
/*if 0 then set sample;*/
declare hash h(dataset:"sample", ordered: 'Y');
h.definekey('Age');
h.definedata('Height');
h.definedone();
end;
set sample;
lo_age = age - ⦥
hi_age = age + ⦥
sumheight = 0; count = 0;
do nage = lo_age to hi_age;
if h.find(key:nage) = 0 then do;
sumheight + height;
count + 1;
end;
end;
averageHeight = sumheight / count;
run;
/**********************END: SOLUTION USING Hash in Data Step***************************************/
/**********************RESULT***************************************/
proc sql;
create table Sample_Result as
select X1.Name
,X2.averageheight as Average_Data_Step
,X3.averageHeight as Average_Hash
,X4.B_Height_Mean as Average_Proc_Sql
,X5.expected_height as Average_Proc_Sql_2
from Sample X1
left join Want_data_step X2 on X1.Name=X2.Name
left join Want_hash X3 on X1.Name=X3.Name
left join Want_proc_sql X4 on X1.Name=X4.A_Name
left join Want_proc_sql_2 X5 on X1.Name=X5.Name
;quit;
/**********************RESULT***************************************/
... View more