- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Hi.
I have a dataset with two variables,one is ID which is not unique and the other is Scores.I want to create a new variable New_Score.
Here is the logic:
1)If the ID have the value of 12 or 18,then New_Score equal to 12 or 18;
2)If the ID have the value of 12 and also 18,then New_Score equal to 12;
3)If all the values of ID are less than 100,then New_Score equal to minimun value of ID;
4)If not all the values of ID are less than 100,then New_Score equal to minimun value of ID that are greater than or equal to 100;
For example:
data m01;
input ID Scores;
cards;
1 12
1 20
2 18
2 99
3 12
3 18
4 27
4 99
5 80
5 100
5 180
;
run;
The dataset I want is this:
1 12
2 18
3 12
4 27
5 100
.
I want to use HASH to create the dataset,but I do not how to do it.Please help me.Thanks in advance.
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
data foo;
input id _score @@;
cards;
1 12 1 20 2 18 2 99 3 12 3 18
4 27 4 99 5 80 5 100 5 180
;
run;
/* with hash */
data bar;
if 0 then set foo;
dcl hash foo(dataset:'foo',multidata:'y');
foo.definekey('id');
foo.definedata(all:'y');
foo.definedone();
dcl hash k(dataset:'foo',ordered:'y',duplicate:'r');
k.definekey('id');
k.definedata('id');
k.definedone();
dcl hiter iter('k');
rc=iter.first();
do while(rc=0);
rc=foo.find();
score=_score;
r=foo.find_next();
do while(r=0);
select;
when (_score ge 100 and score ge 100) score=min(score,_score);
when (_score ge 100 and score lt 100) score=max(score,_score);
when (_score eq 12 or score eq 12) score=12;
when (_score eq 18 or score eq 18) score=18;
otherwise score=min(score,_score);
end;
r=foo.find_next();
end;
output;
rc=iter.next();
end;
keep id score;
stop;
run;
/* with dow */
proc sort data=foo; by id _score; run;
data bar;
do until(last.id);
set foo;
by id;
select;
when(_score ge 100 and score ge 100) score=min(score,_score);
when(_score ge 100 and score lt 100) score=max(score,_score);
when(_score eq 12 or score eq 12) score=12;
when(_score eq 18 or score eq 18) score=18;
otherwise score=min(score,_score);
end;
end;
output;
drop _:;
run;
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Thanks!But the hash code can not run at SAS 9.1.3.
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Summy,
Hash objects in 9.1.3 cannot hold multiple data records per key. While there are still ways to do this on your version with hash, I would recommend taking one of the DOW style approaches myself and chang_y_chung demonstrated.
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
FriedEgg,
Thanks! I learn from yours and chang_y_chung's.]And find the way.
data m01;
input id score @@;
cards;
1 12 1 4 1 20 1 109 2 189 2 18 2 99 3 12 3 18 3 146 4 27 4 99 5 80 5 100 5 180 6 101 6 700 7 12 8 99
;
run;
data _NULL_;
length has12 has18 hasge100 min1 min2 new_score 8.;
if _N_=1 then do;
declare hash h(ordered:"a");
h.definekey("id");
h.definedata("id","has12","has18","hasge100","min1","min2","new_score");
h.definedone();
end;
set m01 end=last;
if h.find() ne 0 then do;
has12=0;has18=0;hasge100=0;
min1=9999;min2=9999;
new_score=0;
h.add();
end;
if h.find()=0 then Do;
if score=12 then has12=1;
if score=18 then has18=1;
if score ge 100 then hasge100=1;
/* h.replace();*/
max=max(score,max);
min1=min(score,min1);
if hasge100=1 then min2=min(score,min2);
new_score=min1;
if hasge100=1 then new_score=min2;
if has18=1 then new_score=18;
if has12=1 then new_score=12;
if has12=1 and has18=1 then new_score=12;
/* if has12=1 then new_score=12;*/
/* if has18=1 then new_score=18;*/
/* if has12=1 and has18=1 then new_score=12;*/
/* if hasge100=1 then new_score=min2;*/
/* else new_score=min1;*/
h.replace();
end;
if last then h.output(dataset:"m02");
run;
proc print data=m02(keep=id new_score) noobs;run;
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Well, here is another way. No hash, though.
/* sample data */
data one;
infile cards missover;
input id score @;
do while (not missing(score));
output;
input score @;
end;
cards;
1 12 20
2 18 99
3 12 18
4 27 99
5 80 100 180
;
run;
/* aggregate */
data two;
has12 = 0; has18 = 0; hasGE100 = 0;
min1 = 99999; min2 = 99999; /* larger than any possible score */
do until (last.id);
set one;
by id;
select;
when(score = 12) has12 = 1;
when(score = 18) has18 = 1;
when(100 <= score) hasGE100 = 1;
otherwise;
end;
min1 = min(score, min1);
if 100<=score then min2 = min(score, min2);
end;
select;
when (has12 and has18) new_score = 12;
when (has12) new_score = 12;
when (has18) new_score = 18;
when (hasGE100) new_score = min2;
otherwise new_score = min1;
end;
keep id new_score;
run;
/* check */
proc print data=two noobs;
run;
/* on lst
new_
id score
1 12
2 18
3 12
4 27
5 100
*/