Here is a sample of my data:
[pre]
id,A_a01,B_a01,D_a01,A_a02,C_a02,D_a02,A_a03,B_a03,C_a03,A_a04,B_a04,D_a04,A_a05,B_a05,C_a05,A_a06,B_a06,D_a06,B_a07,C_a07,D_a07,A_a08,B_a08,C_a08,A_a09,B_a09,D_a09,A_a10,C_a10,D_a10,A_a11,B_a11,D_a11,A_a12,C_a12,D_a12,A_a13,B_a13,C_a13,A_a14,B_a14,C_a14,A_a15,B_a15,C_a15,A_a16,B_a16,D_a16,A_a17,C_a17,D_a17,A_a18,B_a18,C_a18,A_a19,C_a19,D_a19,A_a20,B_a20,D_a20,A_a21,B_a21,D_a21,A_a22,B_a22,D_a22,A_a23,B_a23,C_a23,A_a24,B_a24,C_a24,A_a25,B_a25,C_a25,B_a26,C_a26,D_a26,B_a27,C_a27,D_a27,A_a28,B_a28,D_a28,A_a29,B_a29,C_a29,B_a30,C_a30,D_a30,A_a31,B_a31,C_a31,A_a32,B_a32,C_a32,B_a33,C_a33,D_a33,A_a34,B_a34,C_a34,A_a35,B_a35,D_a35,A_a36,C_a36,D_a36,A_a37,B_a37,C_a37,A_a38,C_a38,D_a38,A_a39,B_a39,C_a39,B_a40,C_a40,D_a40,A_a41,B_a41,D_a41,A_a42,C_a42,D_a42,A_a43,B_a43,D_a43,B_a44,C_a44,D_a44,A_a45,C_a45,D_a45,A_a46,B_a46,C_a46,A_a47,B_a47,D_a47,A_a48,B_a48,C_a48,A_a49,B_a49,D_a49
18745,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0
50619,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0
65357,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
77970,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0
107881,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
122719,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
124743,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
144105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
157495,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1
[/pre]
I need the count of how many times each item is chosen with every other item, for all students. This lead me to use the following code:
[pre]
data a;
set DataMatrix;
array x[147] A_a01--D_a49;
array y_[147];
do i=1 to 147;
y_=x;
end;
run;
%macro kf1m();
%local i;
%do i=1 %to 147;
%mfreq(a, _temp0_, A_a01--D_a49, y_&i, weight=);
data _temp1_ (keep= to from count);
format from $5.;
set _temp0_;
where y_&i=nvalue=1;
rename _var_=to;
label to="To";
From="y_&i";
run;
data out;
format from to count;
set out _temp1_;
run;
%end;
%mend;
[/pre]
I am sure there is an easier way to do this, especially considering my total sample is 20,000 students. I also need to be able to run this for each school and district separately, so a macro is essential...
Below I'm posting the macros I call within my macro:
MFREQ:
[pre]
* This macro computes freqs for multiple variables;
* varlist can be a conversional SAS variable list or a Perl regular expression;
* dsn = input data set;
%Macro Mfreq(dsn, outfile, varlist, bylist, weight=);
%local vnames v num namelen varlen len1 len2 type typelist Nlist Clist i;
%if &weight = %then %let weight_statement=;
%else %let weight_statement=%str(weight &weight;);
/* parse &varlist */
/* If an error occurs, exit the macro */
%let vnames = %parse(&dsn, &varlist, _NV_);
%if &vnames= %then %goto Exit;
/* Read the data into the work directory and discard unneeded vars to
speed up the program*/
data _tmp1_; set &dsn (keep=&vnames &bylist &weight); run;
%let dsid=%sysfunc(open(_tmp1_));
/* namelen = maximun length of the variable names.
varlen = maximun value of the variable lengths.
type = C for character or N for numeric.
typelist = type list corresponding to vars vnames (e.g., typelist=NCCN...).
Clist = type list for character vars.
Nlist = type list for numeric vars.
*/
%let v1=%scan(&vnames, 1);
%let namelen=%length(&v1);
%let varlen=%sysfunc(varlen(&dsid, %sysfunc(varnum(&dsid, &v1))));
%do i=1 %to &_NV_;
%let v=%scan(&vnames, &i);
%let num=%sysfunc(varnum(&dsid, &v));
%let type=%sysfunc(vartype(&dsid, &num));
%let typelist=&typelist&type;
%if &type=N %then %let Nlist=&Nlist&type;
%else %let Clist=&Clist&type;
%let len1=%length(&v);
%let len2=%sysfunc(varlen(&dsid, &num));
%if &namelen < &len1 %then %let namelen=&len1;
%if &varlen < &len2 %then %let varlen=&len2;
%end;
%let rc= %sysfunc(close(&dsid));
%if %scan(&bylist,1) ^= %then %do;
proc sort data=_tmp1_; by &bylist; run;
%end;
/* Initialize output file:
1. _Var_ character variable to hold the names of variables.
2. If &Nlist ^= null, the varlist contains numeric variables.
Create a numeric variable Nvalue to hold the values of these variables.
3. If &Clist ^= null, the varlist contains character variables.
Create a character variable Cvalue to hold the values of these variables.
4. count = frequqncies of the variables.
5. No observations are created.
*/
data &outfile;
format _Var_ $&namelen..;
%if &Nlist ^= %then %do;
length Nvalue 8.;
%end;
%if &Clist ^= %then %do;
length Cvalue $&varlen;
%end;
length Count Percent 8.;
if _N_=0;
run;
/* Inside the loop:
1. Extract each variable from &vnames.
2. Compute the frequency for the extracted variable using proc freq
and save the output to file tmp2.
3. Rename the variable as "Cvalue" or "Nvalue" depending on it's type.
4. Append the file tmp3 to the final output file.
*/
%do i=1 %to &_NV_;
%let v=%scan(&vnames, &i);
/* Compute frequency for &v */
proc freq data=_tmp1_ noprint;
tables &v/out=_tmp2_;
by &bylist;
&weight_statement
run;
data _tmp3_; set _tmp2_;
_Var_ ="&v";
%let type=%substr(&typelist, &i, 1);
%if &type = C %then %do;
rename &v = Cvalue;
label &v = 'Character Values';
%end;
%else %do;
rename &v = Nvalue;
label &v = 'Numeric Values';
%end;
run;
data &outfile; format &bylist _Var_; set &outfile _tmp3_; run;
%end;
%symdel _NV_;
%Exit:
%Mend Mfreq;
[/pre]
PARSE:
[pre]
/******************************************************************************************
MACRO FUNCTION: %Parse
AUTHOR: Jimmy Z. Zou
CREATED: 7/1/2003
MODIFIED: 5/30/2006 to make it work with SAS Perl regular expressions.
DESCRIPTION:
This macro function is used to parse a variable list for any given SAS data set. The variable list
is either a conventional SAS variable list (like those used in data steps and proc steps) or a
Perl regular expression.
The function returns a complete list of variable names corresponding to
the variable list. If an error occurs, the function returns a missing value (null character). Optionally,
the function saves the number of variables in the list to a global variable that is specified by
the user.
SYNTAX:
%Parse(dsn, varlist<,nvars>)
dsn - a SAS data set name.
varlist - a variable list to be parsed, in either one of the following two forms:
(1) A conventional SAS variable list such as
varlist = cd b03-b50 t -- f xy: _NUMERIC_ r-character-z
(2) A Perl regular expression using /.../ as delimiter such as varlist=/^xy/.
Note: The two forms are not allowed to be mixed together in the same variable list.
nvars - optional parameter to specify the name of a global variable to hold the number of variables
returned by the function.
Examples:
data Example;
length name $10 sex $1;
length ID age month1-month5 b001-b020 8.;
length State region ck1-ck5 $2;
run;
* Parse conventional variable lists;
%let varlist=month3-month5 b006-b010 name region--check2 s:;
%put The variable names are: %Parse(Example, &varlist, n);
%put The number of variables in the list is: &n;
%put All variables in the data set are: %Parse(Example, _ALL_);
%put All numeric variables in the data set are: %Parse(Example, _NUMERIC_);
%put All character variables between age and region are: %Parse(Example, age-character-region);
* Parse Perl regular expressions;
%put The variables starting with s are (case sensitive): %parse(Example, /^s/);
%put The variables starting with s (case insensitive) are: %parse(Example, /^s/i);
%put The variables ending with e are: %parse(Example, /e$/);
%put The variables containing a digit 2 are: %parse(Example, /2/);
%put Variables that has two consecutive digits: %parse(Example, /\d\d/);
%put Variables that has two consecutive non_digits(letters or underscores): %parse(Example, /\D\D/);
%put Variables with a length of 3: %parse(Example, /^...\b/);
%put Variables with a length of 3: %parse(Example, /^.{3}$/);
******************************************************************************************/
%Macro Parse(dsn, varlist, nvars);
%local i j k _n_ word upword count d p p1 p2 name name1 name2 suffix1 suffix2 prefix namelist;
%let dsid=%sysfunc(open(&dsn));
%if not &dsid %then %do;
%put %sysfunc(sysmsg());
%goto Exit;
%end;
/* Get the total number of variables in dsn */
%let _n_=%sysfunc(attrn(&dsid, nvars));
%let count=0;
/* If varlist is not a Perl regular expression... */
%if not %index(&varlist, /) %then %do;
/*
Standardize the varlist:
Removing extra blanks in the varlist and group the variables into words.
For example, if the input varlist is
varlist = cd b003 - b152 t -- f xy: _NUMERIC_ r -character- z
After the following steps, it will become
varlist = cd b003-b15 t--f xy: _NUMERIC_ r-character-z
*/
/* %let varlist=%cmpres(&varlist);*/
%let varlist=%sysfunc(compbl(&varlist));
%let varlist=%sysfunc(tranwrd(&varlist, %str( )-, -));
%let varlist=%sysfunc(tranwrd(&varlist, -%str( ), -));
/*
Divide and Conquer:
Set up a loop to extract the words in varlist one by one.
Then parse each word to get the variable names.
*/
%let i=1;
%do %until (%qscan(&varlist, &i, %str( ))=%str());
%let word=%qscan(&varlist, &i, %str( ));
%let upword=%upcase(&word);
%let p=%index(&word, --);
/* Parse a word like t--f (name range variable list):
1. Extract the beginning and ending variable names (name1 and name2 in the code below).
2. Get the position numbers for name1 and name2 (p1 and p2 in the code below).
3. Check errors.
4. Update the counter (count)
5. Get the names of the variables between name1 and name2:
%sysfunc(varname(&dsid, j)) p1<= j <=p2
6. Add the names to the namelist using a loop.
*/
%if &p %then %do;
%let name1=%substr(&word, 1, &p-1);
%let name2=%substr(&word, &p+2);
%let p1=%sysfunc(varnum(&dsid,&name1));
%let p2=%sysfunc(varnum(&dsid,&name2));
%if &p1=0 | &p2=0 | (&p1 > &p2) %then %do;
%put ERROR: Invalid variable list &word;
%goto Exit;
%end;
%let count=%eval(&count+&p2-&p1+1);
%do j=&p1 %to &p2;
%let namelist=&namelist %sysfunc(varname(&dsid, &j));
%end;
%end;
/* Parse a word like ab: (name prefix variable list):
1. Extract the prefix.
2. Using a loop to compare the prefix with each variable name in the data set.
3. If a name matches the prefix, add it to the namelist.
*/
%else %if %index(&word, 🙂 %then %do;
%let prefix=%sysfunc(compress(&word, :));
%do j=1 %to &_n_;
%let name=%sysfunc(varname(&dsid, &j));
%if (%length(&name) >= %length(&prefix)) & (%sysfunc(compare(&prefix, &name, :i))=0) %then %do;
%let count=%eval(&count + 1);
%let namelist=&namelist &name;
%end;
%end;
%end;
/* Parse special SAS Name lists: _ALL_, _NUMERIC_, or _CHARACTER_:
Loop through all the variables in the data set dsn and select the names based on
the special keywords:
1. _ALL_ - select all variables.
2. _NUMERIC_ - select all numeric variables. %sysfunc(vartype(&dsid, &j))=N if the jth
variable is numeric, and %sysfunc(vartype(&dsid, &j))=C if character.
3. _CHARACTER_ - select all character variables.
*/
%else %if %sysfunc(indexw(_ALL_ _NUMERIC_ _CHARACTER_, &upword)) %then %do;
%do j=1 %to &_n_;
%if &upword=_ALL_ %then %do;
%let namelist=&namelist %sysfunc(varname(&dsid, &j));
%let count=%eval(&count + 1);
%end;
%else %if %sysfunc(vartype(&dsid, &j))=%substr(&upword,2,1) %then %do;
%let namelist=&namelist %sysfunc(varname(&dsid, &j));
%let count=%eval(&count + 1);
%end;
%end;
%end;
/* Parse a word like x-numeric-b or x-character-b:
1. Use function %index to return the positions (p & q) of keywords -NUMERIC-
and -CHARACTER-(the original word was converted to upper case) in upword.
2. Extract the names (name1 & name2) of the beginning and ending variables
in the word.
3. Get the position numbers (p1 & p2) for name1 and name2.
4. Check for error conditions.
5. Using the position number to loop through variables between name1 and name2
and check their variable types. Select those variables whose variable type
matches the keyword (-NUMERIC- or -CHARACTER-).
*/
%else %if %index(&upword, -NUMERIC-) | %index(&upword, -CHARACTER-) %then %do;
%let p=%index(&upword, -NUMERIC-);
%let q=%index(&upword, -CHARACTER-);
%if &p %then %do;
%let name1=%substr(&upword, 1, &p-1);
%let name2=%substr(&upword, &p+9);
%let type=N;
%end;
%else %do;
%let name1=%substr(&upword, 1, &q-1);
%let name2=%substr(&upword, &q+11);
%let type=C;
%end;
%let p1=%sysfunc(varnum(&dsid,&name1));
%let p2=%sysfunc(varnum(&dsid,&name2));
%if &p1=0 | &p2=0 | (&p1 > &P2) %then %do;
%put ERROR: Invalid variable list &word;
%goto Exit;
%end;
%do j=&p1 %to &p2;
%if %sysfunc(vartype(&dsid, &j))=&type %then %do;
%let namelist=&namelist %sysfunc(varname(&dsid, &j));
%let count=%eval(&count + 1);
%end;
%end;
%end;
/* Parse a word like a1-a20 or b003-b152 (numbered range variables):
There are two variations of this variable list - one is that the suffixes can have different
lengths and in the other all name suffixes must have the same length by padding leading 0s
to smaller suffixes such as b003-b010. Both are valid SAS variable lists to be used in data setps.
1. Get the position of the dash "-" in the word: p=%index(&word, -).
2. Extract the names (name1 & name2) of the beginning and ending variables
in the word.
3. k is the position of the suffix for name1.
4. Use this position and name1 to extract the prefix of the name list.
5. Extract the suffixes (suffix1 & suffix2) for name1 and name2.
6. Check for error conditions (invalid name1 or name2, or suffix1>suffix2).
7. All variable names prefixj where suffix1<= j <=suffix2 are supposed to be in the list.
8. If j has fewer digits than suffix1, pad j with leading 0s.
*/
%else %if %index(&word, -) %then %do;
%let p=%index(&word, -);
%let name1=%substr(&word, 1, &p-1);
%let name2=%substr(&word, &p+1);
%let k=%eval(%sysfunc(notdigit(&name1, -%length(&name1))) + 1);
%let prefix=%substr(&name1,1, &k-1);
%let suffix1=%substr(&name1,&k);
%let suffix2=%substr(&name2,&k);
%if %sysfunc(varnum(&dsid,&name1))=0 | %sysfunc(varnum(&dsid,&name2))=0 | (&suffix1>&suffix2) %then %do;
%put ERROR: Invalid variable list &word;
%goto Exit;
%end;
%let len=%length(&suffix1);
%do j=&suffix1 %to &suffix2;
%let d=%eval(&len-%length(&j));
%if &d <=0 & %sysfunc(varnum(&dsid,&prefix&j)) %then %do;
%let namelist=&namelist &prefix&j;
%let count=%eval(&count + 1);
%end;
/* if &d>0 pad j with d leading 0s and save as jj */
%else %do;
%let jj=&j;
%do k=1 %to &d;
%let jj=0&jj;
%end;
%if %sysfunc(varnum(&dsid,&prefix&jj)) %then %do;
%let namelist=&namelist &prefix&jj;
%let count=%eval(&count + 1);
%end;
%end;
%end;
%end;
/* Parse a word like cd - just add it to the namelist */
%else %if %sysfunc(varnum(&dsid,&word)) %then %do;
%let count = %eval(&count + 1);
%let namelist=&namelist &word;
%end;
/* An unrecognized word */
%else %do;
%put ERROR: Invalid variable name or list &word;
%goto Exit;
%end;
%let i=%eval(&i+1);
%end;
%end;
/* Parse a Perl regular expression:
1. Pass Perl regular expression &varlist to function prxparse. Prxparse parses the expression
and returns a pattern id (pid). If an error occurs, it returns a missing value "." and issues
an error message.
2. If the expression is parsed successfully (pid>0), set up a loop to match the pattern againt
each variable name in data set dsn using prxmatch function: %sysfunc(prxmatch(&pid, &name)).
If a match is found, prxmatch returns the position at which the pattern (pid) is first found in
in the name string. If there is no match, prxmatch returns a 0.
3. Once pid matches a name, add the name to the namelist and increase the counter.
4. %syscall prxfree(pid) frees the resources that were allocated to the Perl regular expression (pid).
*/
%else %do;
%local pid;
%let pid =%sysfunc(prxparse(&varlist));
%if &pid=. %then %goto Exit;
%else %if &pid %then %do j=1 %to &_n_;
%let name=%sysfunc(varname(&dsid, &j));
%if %sysfunc(prxmatch(&pid, &name)) %then %do;
%let namelist=&namelist &name;
%let count=%eval(&count + 1);
%end;
%end;
%syscall prxfree(pid);
%end;
%let rc= %sysfunc(close(&dsid));
%if &nvars ^= %then %do;
%global &nvars;
%let &nvars = &count;
%end;
&namelist
%Exit:
%Mend Parse;
[/pre]
Thanks for any help you can provide!!
LP