Hi All,
I have a dataset below with multiple columns and unique ID.
ID | A|B|C
123| 1|1|0
231| 1|0|1
456| 0|1|0
759| 1|1|0
I woul like to have output like this where the diagonal is counting customers who only have that product.
v1|A|B|C
A |0|2|1
B |2|1|0
C |1|0|0So far what I did is I append the product holding as below:
Table Final:
ID |PRODUCT|
123|      A|
123|      B|
231|      A|
231|      C|
456|      B|
759|      A|
759|      B|
data temp;
array x{99999} $ 40 _temporary_;
do i=1 by 1 until(last.id);
  set final;
  by id;
 x{i}=product;
end;
do m=1 to i;
 do n=m to i;
   v1=x{m};
   v2=x{n};
   output;
   if n ne m then do;
     v2=x{m};
     v1=x{n};
     output;
   end;
 end;
end;
keep id v1 v2;
run;
proc freq data=temp noprint;
 table v1*v2/out=temp1 list nopercent nocum;
run;
proc transpose data=temp1 out=want(drop=_:);
by v1;
id v2;
var count;
run;
And the output is having duplicated count like this:
v1|A|B|C
A |3|2|1
B |2|3|0
C |1|0|1I dont want this output but dont know how to change. Please help
Thanks
data have;
infile cards dlm='|';
input ID  A B C;
cards;
123| 1|1|0
231| 1|0|1
456| 0|1|0
759| 1|1|0
;
data temp;
 set have;
 array x{*} a b c;
 if sum(of x{*})=1 then do;
  idx=whichn(1,of x{*});
  name1=upcase(vname(x{idx}));
  name2=upcase(vname(x{idx}));
  output;
 end;
 else do;
   do i=1 to dim(x)-1;
     do j=i+1 to dim(x);
       if x{i}=1 and x{j}=1 then do;
         name1=upcase(vname(x{i}));
		 name2=upcase(vname(x{j}));
		 output;
		 end;
	   end;
	 end;
 end;
keep name1 name2;
run;
proc freq data=temp noprint;
table name1*name2/out=temp1;
run;
data key;
input name $upcase.;
cards;
a
b
c
;
proc sql;
create table temp2 as
select c.name1,c.name2,coalesce(d.count,0) as count
 from 
(select a.name as name1,b.name as name2 from 
   (select * from key) as a,(select * from key) as b) as c
left join temp1 as d
on c.name1=d.name1 and c.name2=d.name2 or c.name1=d.name2 and c.name2=d.name1
order by 1,2;
quit;
proc transpose data=temp2 out=want(drop=_name_);
by name1;
id name2;
var count;
run;Works as what I want! Thanks.
What if I have number of products and I want to sum the products holding? How to add in the script?
ID | A|B|C
123| 2|1|0
231| 1|0|2
456| 0|2|0
759| 1|1|0Result as below:
v1|A|B|C
A |0|5|3
B |5|2|0
C |3|0|0Thanks again!
OK. Try this one .
data have;
infile cards dlm='|';
input ID  A B C;
cards;
123| 2|1|0
231| 1|0|2
456| 0|2|0
759| 1|1|0
;
data temp;
 set have;
 array x{*} a b c;
 zero=0;only=0;
 do i=1 to dim(x);
  if x{i}=0 then zero+1;
   else only=x{i};
 end;
 if zero=dim(x)-1 then do;
  idx=whichn(only,of x{*});
  name1=upcase(vname(x{idx}));
  name2=upcase(vname(x{idx}));
  weight=only;
  output;
 end;
 else do;
   do i=1 to dim(x)-1;
     do j=i+1 to dim(x);
       if x{i} ne 0 and x{j} ne 0 then do;
         name1=upcase(vname(x{i}));
		 name2=upcase(vname(x{j}));
		 weight=x{i}+x{j};
		 output;
		 end;
	   end;
	 end;
 end;
keep name1 name2 weight;
run;
proc freq data=temp noprint;
table name1*name2/out=temp1;
weight weight;
run;
data key;
input name $upcase.;
cards;
a
b
c
;
proc sql;
create table temp2 as
select c.name1,c.name2,coalesce(d.count,0) as count
 from 
(select a.name as name1,b.name as name2 from 
   (select * from key) as a,(select * from key) as b) as c
left join temp1 as d
on c.name1=d.name1 and c.name2=d.name2 or c.name1=d.name2 and c.name2=d.name1
order by 1,2;
quit;
proc transpose data=temp2 out=want(drop=_name_);
by name1;
id name2;
var count;
run;Hi @dearkyr
Another way to proceed:
data have;
	input ID $ A B C;
	cards;
123 1 1 0
231 1 0 1
456 0 1 0
759 1 1 0
;
run;
/* Retrieve the list of variables (except ID): A B C ... in macrovariable &list_letters */
proc contents data=have out=have_contents noprint;
run;
proc sql noprint;
	select distinct name into: list_letters separated by " " from have_contents where name ne "ID";
quit;
/* Identify the frequency of each couple (e.g. A B, ...) */
data have_tr;
	set have;
	
	array _letter(*) &list_letters;
	total = sum(of _letter(*));
	do i=1 to dim(_letter);
		do j=i to dim(_letter);
			if _letter(i)=1 and _letter(j)=1 then
				do;
					couple_1=vname(_letter(i));
					couple_2=vname(_letter(j));
					if total = 1 or (total = 2 and couple_1 ne couple_2) then do;
						couple=compress(catx("", couple_1, couple_2));
						output;
					end;
				end;
		end;
	end;
	keep ID couple;
run;
proc freq data=have_tr;
	table couple / noprint out=have_freq (keep=couple count);
run;
/* Retrieve the list of distinct couples (AB, ...) in macrovariable &list_couple */
proc sql noprint;
	select distinct couple into: list_couple separated by " " from have_tr;
quit;
/* Create the matrix table */
proc transpose data=have_freq out=have_tr2 (drop=_name_ _label_);
	var count;
	ID couple;
run;
proc transpose data=have out=structure (keep=_name_ rename=(_name_=V1));
	var _numeric_;
run;
data want;
	set structure;
	if _n_ = 1 then set have_tr2;
	array _matrix(*) &list_letters;
	array _ref(*) &list_couple;
	do i=1 to dim(_matrix);
		do j=1 to dim(_ref);
			if (substr(vname(_ref(j)),2,1) = V1 
			   and substr(vname(_ref(j)),1,1) = vname(_matrix(i)))
			   or
			   (substr(vname(_ref(j)),1,1) = V1 
			   and substr(vname(_ref(j)),2,1) = vname(_matrix(i)))
			   then _matrix(i) = _ref(j);
			if _matrix(i) = . then _matrix(i) = 0;
		end;
	end;
	keep V1 &list_letters;
run;Hi @dearkyr
I have modified added a data step and also specify a WEIGHT statement in the proc freq to take into account the new request:
data have;
	input ID $ A B C;
	cards;
123 2 1 0
231 1 0 2
456 0 2 0
759 1 1 0
;
run;
/* Retrieve the list of variables (except ID): A B C ... in macrovariable &list_letters */
proc contents data=have out=have_contents noprint;
run;
proc sql noprint;
	select distinct name into: list_letters separated by " " from have_contents where name ne "ID";
quit;
/* Identify the frequency of each couple (e.g. A B, ...) */
data have_tr;
	set have;
	
	array _letter(*) &list_letters;
	total = sum(of _letter(*));
	do i=1 to dim(_letter);
		do j=i to dim(_letter);
			if _letter(i)>0 and _letter(j)>0 then
				do;
					couple_1=vname(_letter(i));
					couple_2=vname(_letter(j));
					if total > 0 then do;
						couple=compress(catx("", couple_1, couple_2));
						output;
					end;
				end;
		end;
	end;
	keep ID couple total;
run;
data have_tr1;
	set have_tr;
	by ID;
	if (first.ID and last.ID)
		or (substr(couple,1,1) ne substr(couple,2,1))
		then output;
run;
proc freq data=have_tr1;
	table couple / noprint out=have_freq (keep=couple count);
	weight total;
run;
/* Retrieve the list of distinct couples (AB, ...) in macrovariable &list_couple */
proc sql noprint;
	select distinct couple into: list_couple separated by " " from have_freq;
quit;
/* Create the matrix table */
proc transpose data=have_freq out=have_tr2 (drop=_name_ _label_);
	var count;
	ID couple;
run;
proc transpose data=have out=structure (keep=_name_ rename=(_name_=V1));
	var _numeric_;
run;
data want;
	set structure;
	if _n_ = 1 then set have_tr2;
	array _matrix(*) &list_letters;
	array _ref(*) &list_couple;
	do i=1 to dim(_matrix);
		do j=1 to dim(_ref);
			if (substr(vname(_ref(j)),2,1) = V1 
			   and substr(vname(_ref(j)),1,1) = vname(_matrix(i)))
			   or
			   (substr(vname(_ref(j)),1,1) = V1 
			   and substr(vname(_ref(j)),2,1) = vname(_matrix(i)))
			   then _matrix(i) = _ref(j);
			if _matrix(i) = . then _matrix(i) = 0;
		end;
	end;
	keep V1 &list_letters;
run;Output:
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.
