BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
tjoones
Calcite | Level 5

Hi everybody,

I have a dataset that I need to transform in a number of ways, and I'm very much stuck on how to do it. If anybody can help though, it would be beyond awesome.

I have an example of what I have and what I would like to transform it into:

Have:

ID     QSN          VALUE
1      Prep            Zov
1      Prep            Pan
1      Prep            Dol
1      Strength      50
1      Strength      50
1      Strength      100
2      Prep            Zov
2      Prep            Zov
2      Prep            Zov
2      Strength      55
2      Strength      50
2      Strength      60

Want:

ID      Zov      Zov_Strength      Pan      Pan_Strength      Dol      Dol_Strength
1         1          50                      1           50                      1           100
2         3          60                      0           .                         0             .

To (try to)  put it in words:
1) When the QSN variable takes on the value "Prep", i need the value of the value variable to be a variable in the new dataset (example "Zov").
2) The value of this variable, should be the number of times it showed up under the ID.
3) When the QSN variable takes on the value "Strength", it corresponds to a value of Prep.
4) If there are more than one prep (example "Zov") and the strength variables are diffierent, the correct one should be the highest.
5) The data in the original data set is ordered so that the "prep"s come first, then the corresponding "Strength"s follow.

I hope this makes sense and if anybody can help me out, I would be incredibly grateful.

Best regards
Thomas

1 ACCEPTED SOLUTION

Accepted Solutions
MikeZdeb
Rhodochrosite | Level 12

hi .. another idea ...

data have;

input id qsn :$8. value :$3. @@;

datalines;

1      Prep         Zov    1      Prep         Pan 

1      Prep         Dol    1      Strength      50

1      Strength      50    1      Strength     100 

2      Prep         Zov    2      Prep         Zov

2      Prep         Zov    2      Strength      55

2      Strength      50    2      Strength      60

;

data x (drop=qsn);

merge have (where=(qsn eq 'Prep')) have (where=(qsn  ^eq 'Prep') rename=(value=temp));

by id;

run;

proc sql;

create table xx (drop=x) as

select id, value, value as x, count(*) as y from x

group by id, value

union

select id, catt(value,'_Strength') as value, value as x, max(input(temp,5.)) as y from x

group by id, value

order by id, x desc;

quit;

proc transpose data=xx out=want (drop=_name_);

by id;

var y;

id value;

run;

               Zov_               Pan_               Dol_

id    Zov    Strength    Pan    Strength    Dol    Strength

1     1        50        1        50        1        100

2     3        60        .         .        .          .

View solution in original post

6 REPLIES 6
Keith
Obsidian | Level 7

To start with, you need to restructure your dataset as it is very difficult to achieve your goal with the present structure.  You should have 2 columns, 'Prep' and 'Strength', containing the relevant values,  instead of the QSN and Value columns.  It will then be much easier to summarise.

The code below gives you output identical to yours, however I suggest if you have more than the 3 value types in your example then you wrap the calculation part inside a macro.

data have1;

input ID QSN $ VALUE $;

cards;

1      Prep            Zov

1      Prep            Pan

1      Prep            Dol

1      Strength      50

1      Strength      50

1      Strength      100

2      Prep            Zov

2      Prep            Zov

2      Prep            Zov

2      Strength      55

2      Strength      50

2      Strength      60

run;

/* restructure dataset */

data temp1;

set have (where=(qsn='Prep'));

rename value=prep;

drop qsn;

run;

data temp2;

set have (where=(qsn='Strength'));

strength=input(value,best12.);

drop qsn value;

run;

data have2;

merge temp1 temp2;

by id;

run;

/* summarise data */

data want;

set have2;

by id;

if first.id then call missing(zov,zov_strength,pan,pan_strength,dol,dol_strength);

retain zov_strength pan_strength dol_strength;

      zov+prep='Zov';

      if prep='Zov' and strength>zov_strength then zov_strength=strength;

      pan+prep='Pan';

      if prep='Pan' and strength>pan_strength then pan_strength=strength;

      dol+prep='Dol';

      if prep='Dol' and strength>dol_strength then dol_strength=strength;

if last.id then output;

drop prep strength;

run;

Alpay
Fluorite | Level 6

data x;

input ID   QSN $  VALUE $;

datalines;

1 Prep Zov

1 Prep Pan

1 Prep Dol

1 Strength 50

1 Strength 50

1 Strength 100

2 Prep Zov

2 Prep Zov

2 Prep Zov

2 Strength 55

2 Strength 50

2 Strength 60

;

run;

data want;

length ID 8;

array p[1:3] 8 Zov Pan Dol;

array s[1:3] 8 ;

array st[1:3] 8 Zov_Strength Pan_Strength Dol_Strength;

call missing(of Zov_: Pan_: Dol_: s:);

do idx=1 to 3;

   p[idx]=0;

end;

idx = 0;

do until(last.ID);

  set x;

  by ID;

    idx + 1;

    if Idx = 4 then Idx = 1;

    if QSN = 'Prep' then do;

     if Value = 'Zov' then do;Zov + 1;_Zov_Idx = Idx;end;

     else if Value = 'Pan' then do;Pan + 1;_Pan_Idx = Idx;end;

     else if Value = 'Dol' then do;Dol + 1;_Dol_Idx = Idx;end;

    end;

    else if QSN = 'Strength' then do;

     s[Idx] = INPUT(Value,best12.);

    end;

end;

if Zov = 3 then do;CALL SORTN(of s:);Zov_Strength = s[3];end;

else if Pan = 3 then do;CALL SORTN(of s:);Pan_Strength = s[3];end;

else if Dol = 3 then do;CALL SORTN(of s:);Dol_Strength = s[3];end;

else do;

  Zov_Strength = s[_Zov_Idx];

  Pan_Strength = s[_Pan_Idx];

  Dol_Strength = s[_Dol_Idx];

end;

keep Zov: Pan: Dol: ID;

run;

MikeZdeb
Rhodochrosite | Level 12

hi .. another idea ...

data have;

input id qsn :$8. value :$3. @@;

datalines;

1      Prep         Zov    1      Prep         Pan 

1      Prep         Dol    1      Strength      50

1      Strength      50    1      Strength     100 

2      Prep         Zov    2      Prep         Zov

2      Prep         Zov    2      Strength      55

2      Strength      50    2      Strength      60

;

data x (drop=qsn);

merge have (where=(qsn eq 'Prep')) have (where=(qsn  ^eq 'Prep') rename=(value=temp));

by id;

run;

proc sql;

create table xx (drop=x) as

select id, value, value as x, count(*) as y from x

group by id, value

union

select id, catt(value,'_Strength') as value, value as x, max(input(temp,5.)) as y from x

group by id, value

order by id, x desc;

quit;

proc transpose data=xx out=want (drop=_name_);

by id;

var y;

id value;

run;

               Zov_               Pan_               Dol_

id    Zov    Strength    Pan    Strength    Dol    Strength

1     1        50        1        50        1        100

2     3        60        .         .        .          .

Haikuo
Onyx | Level 15

Mike's first step of transformation is crucial to dynamic solution, after that, the task is mechanical.  Here is another approach using Hash():

data have;

input (ID     QSN          VALUE) (:$);

cards;

1      Prep            Zov

1      Prep            Pan

1      Prep            Dol

1      Strength      50

1      Strength      50

1      Strength      100

2      Prep            Zov

2      Prep            Zov

2      Prep            Zov

2      Strength      55

2      Strength      50

2      Strength      60

;

data have;

merge have (where=(qsn='Prep')) have (rename=(qsn=_qsn value=_value) where=(_qsn='Strength'));

by id;

run;

proc sql;

select distinct value into :value separated by ' ' from have;

quit;

    %macro exe;

    %do i=1 %to %sysfunc(countw(&value));

    %let temp= %scan(&value,&i);

    _rc=h.find(key:id, key:"&temp.");

    &temp.=0;

           do while (_rc=0);

               &temp.=&temp.+1;

            &temp._strength=ifn(&temp._strength>_value,&temp._strength, _value);

            _rc=h.find_next(key:id,key:"&temp.");

            end;

      %end;

      %mend;

data want;

    if _n_=1 then do;

       if 0 then set have;

       dcl hash h(dataset:'have', multidata:'y');

       h.definekey('id','value');

       h.definedata(all:'y');

       h.definedone();

    end;

    set have (keep=id);

    by id;

    if first.id;

      %exe

      drop qsn value _:;

    run;

proc print;run;

Haikuo

Ksharp
Super User

Assuming Prep and Strength are exactly matched.

data have1;
input ID QSN $ VALUE $;
cards;
1      Prep            Zov
1      Prep            Pan
1      Prep            Dol
1      Strength      50
1      Strength      50
1      Strength      100
2      Prep            Zov
2      Prep            Zov
2      Prep            Zov
2      Strength      55
2      Strength      50
2      Strength      60
;
run;
data temp;
 merge have1(where=(qsn='Prep')) have1(where=(_qsn='Strength') rename=(qsn=_qsn value=_value));
run;
proc sql;
create table temp1 as
 select *,count(*) as freq from temp group by id,qsn,value,_qsn having  _value=max(_value);
quit;
proc transpose data=temp1 out=want1(drop=_name_);
 by id;
 id  value _qsn;
 var _value;
run;
proc transpose data=temp1 out=want2(drop=_name_);
 by id;
 id  value;
 var freq;
run;
data want;
 merge want1 want2;
 by id;
run;


Ksharp

Message was edited by: xia keshan

tjoones
Calcite | Level 5

Thanks for the help everybody. It's awesome to get help like this when I'm stuck. I truly appreciate it.

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 6 replies
  • 2135 views
  • 11 likes
  • 6 in conversation