data Main;
length RefID $ 7 Default 8 Question1 8 Question2 8 Question3 8 Qual 8;
infile datalines missover dlm=",";
input RefID Default Question1 Question2 Question3 Qual;
datalines;
RefID1,0,3,2,2,4
RefID2,0,3,1,3,4
RefID3,1,3,1,3,4
RefID4,0,3,3,3,4
RefID5,0,3,2,1,4
RefID6,1,3,2,3,4
RefID7,0,3,1,2,4
RefID8,1,3,2,3,4
RefID9,0,2,3,4,4
RefID10,0,3,1,4,4
RefID11,1,3,2,2,4
RefID12,1,2,3,2,4
RefID13,0,3,3,3,4
RefID14,0,3,2,4,4
;
run;
data Change;
length Variable $ 20 Want 8 Current 8;
input Variable $ Want Current;
datalines;
Question2 1 2
Question3 1 4
;
run;
data _null_;
set Change;
if _n_ = 1 then call execute('Proc sql;');
call execute(catx(" ", 'update Main set', Variable, '=', Want, 'where', Variable, '=', Current, ';'));
run;
PROC SORT DATA=Main;
BY Question1;
RUN;
proc sql;
create table Main2 as
SELECT
NT.*,
SUM(NT.ObservationNumber) AS TotalObservationNumber,
(NT.ObservationNumber/Calculated TotalObservationNumber) AS NumericRate,
(NT.DefaultNumber/NT.ObservationNumber) as DefaultRate
from
(
select
Question1 as Category,
COUNT(Question2) AS ObservationNumber,
Sum(Default) AS DefaultNumber
from work.Main group by Question1) AS NT;
quit;
proc sql;
Create table work.Main3 as
Select
*,
sqrt((2500/sum((N.YDSum**2)*N.NumericRate))) AS F,
(N.DefaultRate-N.Y) * Calculated F AS Score
FROM(SELECT
NumericRate,
DefaultRate,
(NumericRate*DefaultRate) AS YRow,
SUM(NumericRate*DefaultRate) AS Y,
(SUM(NumericRate*DefaultRate)-DefaultRate) AS YDSum from work.Main2) as N;
QUIT;
I want to have Score columns next to them as well.
Hello @Reeza @ballardw thank you for helping me 🙂 I'm trying to learn 🙂
Actually i solve my problem by writing code below i used simple data. But part of hash codes i'm little bit confused. Is it possible to write different code instead of hash codes. Because i need two data sets to join new & old tables and hash code generated two tables.
data SASLIB.SampleBackTest;
length RefID $12. Default 8 Soru1 8 Soru2 8 Soru3 8;
infile datalines missover dlm=",";
input RefID Default Soru1 Soru2 Soru3;
datalines;
RefID1,0,3,2,2
RefID2,0,3,1,3
RefID3,1,3,1,3
RefID4,0,3,3,3
RefID5,0,3,2,1
RefID6,1,3,2,3
RefID7,0,3,1,2
RefID8,1,3,2,3
RefID9,0,2,3,1
RefID10,0,3,1,4
RefID11,1,3,2,2
RefID12,1,2,3,2
RefID13,0,3,3,3
RefID14,0,3,2,4
;
run;
data SASLIB.DegisenSorular;
if _n_ eq 1 then do;
if 0 then set SASLIB.DegisecekSorular;
declare hash ha(dataset:'SASLIB.DegisecekSorular');
ha.definekey('variable','Mevcut');
ha.definedata('Istenilen');
ha.definedone();
end;
set SASLIB.SampleBackTest ;
array x{*} Soru1-Soru3;
do k=1 to dim(x);
variable=vname(x{k});
Mevcut=x{k};
if ha.find()=0 then x{k}=Istenilen;
end;
drop k variable Istenilen Mevcut;
run;
%macro vars(dsn,chr,out);
%let dsid=%sysfunc(open(&dsn));
%let n=%sysfunc(attrn(&dsid,nvars));
data &out;
set &dsn(rename=(
%do i = 3 %to &n;
%let var=%sysfunc(varname(&dsid,&i));
&var=&chr&var
%end;));
%let rc=%sysfunc(close(&dsid));
run;
%mend vars;
/** First parameter is the data set that contains all of the variables. **/
/** Second parameter is the characters used for the prefix. **/
/** Third parameter is the new data set that contains the new variables. **/
%vars(saslib.samplebacktest,O_,saslib.samplebacktest_RNM)
PROC SORT DATA=SASLIB.DegisenSorular;
BY RefID Soru:;
RUN;
PROC SORT DATA=SASLIB.SampleBackTest_RNM;
BY RefID O_Soru:;
RUN;
data SASLIB.AllTogether;
merge SASLIB.DegisenSorular
SASLIB.SampleBackTest_RNM;
;
by RefID;
run;
data SASLIB.results;
length
soru 8
cevap 8
AdetselOran 8
DefaultOrani 8
YRow 8
Y 8
YDSum 8
F 8
Score 8
;
run;
%macro soru(n);
%do i=1 %to &n;
proc sql;
create table SASLIB.SampleBackTest&i as
SELECT
&i as soru,
soru&i as cevap,
GozlemAdedi,
DefaultAdedi,
SUM(NT.GozlemAdedi) AS ToplamGozlemAdedi,
(NT.GozlemAdedi/Calculated ToplamGozlemAdedi) AS AdetselOran,
(NT.DefaultAdedi/NT.GozlemAdedi) as DefaultOrani
from
(
select
Soru&i,
COUNT(Soru&i) AS GozlemAdedi,
Sum(Default) AS DefaultAdedi
from SASLIB.DegisenSorular group by Soru&i) AS NT;
quit;
proc sql;
Create table work.finalResult&i as
Select
n.*,
sqrt((2500/sum((N.YDSum**2)*N.AdetselOran))) AS F,
(N.DefaultOrani-N.Y) * Calculated F AS Score
FROM(SELECT
soru,
cevap,
AdetselOran,
DefaultOrani,
(AdetselOran*DefaultOrani) AS YRow,
SUM(AdetselOran*DefaultOrani) AS Y,
(SUM(AdetselOran*DefaultOrani)-DefaultOrani) AS YDSum
from SASLIB.SampleBackTest&i) as N, SASLIB.SampleBackTest&i z
where n.DefaultOrani=z.DefaultOrani
and n.soru=z.soru
and n.cevap=z.cevap;
QUIT;
PROC APPEND BASE=SASLIB.results
DATA=finalResult&i;
RUN;
data SASLIB.results;
set SASLIB.results;
where Soru ne .;
run;
PROC SQL;
CREATE TABLE AddedScore&i AS
SELECT
a.REFID,
score as Score&i
FROM SASLIB.AllTogether a, SASLIB.results b
where b.soru=&i
and a.Soru&i=b.cevap
;
QUIT;
PROC SQL;
CREATE TABLE SASLIB.AllTogether AS
SELECT t1.*,
t2.Score&i
FROM SASLIB.AllTogether t1
INNER JOIN WORK.AddedScore&i t2 ON (t1.RefID = t2.RefID);
QUIT;
proc delete data=finalResult&i; run;
proc delete data=firstResult&i; run;
proc delete data=AddedScore&i; run;
%end;
%mend;
%soru(3);
Hi,
As Reeza has mentioned, you would be far better off changing the strcuture of your data to a normalised format:
QUESTION RESPONSE
Please provide, a simple datastep with some test data, and an example of what you want the output to look like. Virtually half that code you post there seems redundant.
Hello,
I will consider your suggestions. I have a pre-created hash code, I try to create in another way because I don't know its meaning. Actually, I haven't created a hash code. I could manage to finish the process using the code below. Can you help me about the image I created on the top while scoring or can you direct me to something or somewhere else?
proc sql noprint;
select Variable into : list separated by ' '
from Change;
quit;
%macro ques(what);
proc sql;
update Main set &what=(select Want from Change
where Variable="&what")
where &what=(select Current from Change
where Variable="&what");
quit;
%mend;
%macro loop;
%let i=1;
%do %while(%scan(&list.,&i.,%STR( ))~=);
%ques(%scan(&list.,&i.,%STR( )));
%let i=&i.+1;
%end;
%mend;
%loop;
Thank you.
I would be happy if someone could help in this case.
Thank you.
Please see my previous post for further information:
Please provide, a simple datastep with some test data, and an example of what you want the output to look like. Virtually half that code you post there seems redundant.
Hello RW9,
Actually, this is a sample data , the original version of the data is 4000 columns. What are the part you think ate extra or missing? I am putting the code below again and i will be writing what i do and what i need to do . The question has got a bit long in the forum 😞 Iwill put below the images that i want to be created.
/*in these three steps, the values in the Want table overwrite the values the Main_sql table.*/
proc sql noprint;
select Variable into : list separated by ' '
from Change;
quit;
%put &list;
%macro soru(var);
proc sql;
update Main set &var=(select Want from Change
where Variable="&var")
where &var=(select Current from Change
where Variable="&var");
quit;
%mend;
%macro loop;
%let i=1;
%do %while(%scan(&list.,&i.,%STR( ))~=);
%soru(%scan(&list.,&i.,%STR( )));
%let i=&i.+1;
%end;
%mend;
%loop;
/* I do the scoring from here to main3 but i have done this process for only Question2 Column.
I also would like to see the Question2 column next to the score rows.
In the end, i want all these scores to be seen as Score1 in main.
The actual problem is uniting the values in the Question1 in the main_sql table
and Main the Question1 after want table overwritin the main table in the same table. */
PROC SORT DATA=Main;
BY Question2;
RUN;
proc sql;
create table Main2 as
SELECT
NT.*,
SUM(NT.ObservationNumber) AS TotalObservationNumber,
(NT.ObservationNumber/Calculated TotalObservationNumber) AS NumericRate,
(NT.DefaultNumber/NT.ObservationNumber) as DefaultRate
from
(
select
Question2 as Category,
COUNT(Question2) AS ObservationNumber,
Sum(Default) AS DefaultNumber
from Main group by Question2) AS NT;
quit;
proc sql;
Create table Main3 as
Select
*,
sqrt((2500/sum((N.YDSum**2)*N.NumericRate))) AS F,
(N.DefaultRate-N.Y) * Calculated F AS Score
FROM(SELECT
NumericRate,
DefaultRate,
(NumericRate*DefaultRate) AS YRow,
SUM(NumericRate*DefaultRate) AS Y,
(SUM(NumericRate*DefaultRate)-DefaultRate) AS YDSum from Main2) as N;
QUIT;
This is the order of the scores. I would like the final image of the table like this. (Score Columns result is not correct)
Can you explain WHAT you are trying to do? For example, to me it looks like you are trying to score an instrument where the score used for some of the responses is different than the value recorded for those responses. For example you might have questions with answer values from 1 to 5 and some of the questions 5 is "good" response and for others "1" is the good response. So to create a total score over all of the questions you first need to modify the scores before adding. So in this case your first table is the users responses to the questions and the second table are the rules for how to modify the individual question responses so that a total score can be calculated.
Hello Tom,
Actually you have understood most of what I am trying to generate but i haven’t realized why it is so important. It is not related to the instrument. My task is related to the banking. As you said, there are some questions users answered which are orjinal and there are some questions which we want to change the answers .After that the scoring is done. It seems it is not possible to change scoring which i don't understand well. What did you mean about modify the score part? I'm trying to solve how i can create Variable names which start with Question in the Main table as row values in the Main3 table. I thought of the vname function but i couldn't handle it. At the result of Main table for every RefId and Variable columns , score values will be different. These parts are a little bit confusing. I hope i could inform you as you want
Thank you.
That helps. So what are the rules for making the scores? Can they be written in a way that means the scoring can be done with the data in a vertical structure (similar to your second dataset in this question)? It is much easier to deal with this type or problem with the data in that form. You could always re-create the multiple column version of the data as a report if you want.
So if your response data is in this form, that is you have some type of identifier variable that will uniquely identify the rows (ID) and one or more variables with the answers to the questions (question1-question4).
data responses;
input id $ question1-question4 ;
cards;
1 1 2 3 4
2 2 3 4 5
3 5 4 3 2
;;;;
And your transformation rules are in this form. That is you have one row per question id (_NAME_) and response value (OLD) and one variable that indicate the transformed value (NEW).
data rules ;
length _name_ $32 ;
input _name_ old new ;
cards;
QUESTION1 1 4
QUESTION2 3 1
;;;;
Then you can combine them by transposing the repsonses and merging with the rules.
proc transpose data=responses out=vertical (rename=(col1=OLD)) ;
by id ;
var question1 - question4 ;
run;
proc sql noprint ;
create table new_responses as
select a.id
, a._name_
, a.old
, coalesce(b.new,a.old) as new
from vertical a
left join rules b
on upcase(a._name_) = upcase(b._name_)
and a.old = b.old
order by 1,2
;
quit;
Now if your "SCORE" function is just to take the sum of the NEW transformed value then you can use many techniques to create the sum. For example PROC SUMMARY.
proc summary data=new_responses nway ;
by id ;
var new ;
output out=scores sum=score ;
run;
Now you could add these back into the vertical table as new rows .
proc transpose data=score out=vertical_score (rename=(col1=NEW));
by id ;
var score ;
run;
data new_vertical ;
set vertical vertical_score ;
by id _name_;
run;
You can then transform this back into a wide table if you want .
proc transpose data=new_vertical out=new_wide ;
by id;
id _name_;
var new ;
run;
If your scoring rules are more complex, but can be reduced to the sum of factor weights for each question response then you can also use this method. You might even be able to use PROC SCORE to calculate your scores if you set it up right.
Hello Tom,
Thank you for your detailed information. I couldn't solve my question yet. I would like to ask is it possible to add string value when we rename the column name on the Proc Transpose procedure. I want to add O character all of the Question columns like "O_Question1" by using code as below which you wrote.
proc transpose data=New_Responses out=new_wide ;
by id;
id _name_;
var new;
run;
Thank you.
Can.
The PROC TRANSPOSE step
proc transpose data=New_Responses out=new_wide ;
by id;
id _name_;
var new;
run;
is using the value in the variable _NAME_ to set the names of the variable in the new dataset.
So just add another step before the PROC TRANSPOSE to change the values of the _NAME_ variable.
data New_Responses;
set New_Responses ;
_name_ = 'O_' || _name_;
run;
Hi Tom,
Out of interest would the prefix option also do the same?:
proc transpose data=New_Responses out=new_wide prefix=o_;
by id;
id _name_;
var new;
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.