Hi All,
I have a dataset below with multiple columns and unique ID.
ID | A|B|C
123| 1|1|0
231| 1|0|1
456| 0|1|0
759| 1|1|0
I woul like to have output like this where the diagonal is counting customers who only have that product.
v1|A|B|C
A |0|2|1
B |2|1|0
C |1|0|0
So far what I did is I append the product holding as below:
Table Final:
ID |PRODUCT|
123| A|
123| B|
231| A|
231| C|
456| B|
759| A|
759| B|
data temp;
array x{99999} $ 40 _temporary_;
do i=1 by 1 until(last.id);
set final;
by id;
x{i}=product;
end;
do m=1 to i;
do n=m to i;
v1=x{m};
v2=x{n};
output;
if n ne m then do;
v2=x{m};
v1=x{n};
output;
end;
end;
end;
keep id v1 v2;
run;
proc freq data=temp noprint;
table v1*v2/out=temp1 list nopercent nocum;
run;
proc transpose data=temp1 out=want(drop=_:);
by v1;
id v2;
var count;
run;
And the output is having duplicated count like this:
v1|A|B|C
A |3|2|1
B |2|3|0
C |1|0|1
I dont want this output but dont know how to change. Please help
Thanks
data have;
infile cards dlm='|';
input ID A B C;
cards;
123| 1|1|0
231| 1|0|1
456| 0|1|0
759| 1|1|0
;
data temp;
set have;
array x{*} a b c;
if sum(of x{*})=1 then do;
idx=whichn(1,of x{*});
name1=upcase(vname(x{idx}));
name2=upcase(vname(x{idx}));
output;
end;
else do;
do i=1 to dim(x)-1;
do j=i+1 to dim(x);
if x{i}=1 and x{j}=1 then do;
name1=upcase(vname(x{i}));
name2=upcase(vname(x{j}));
output;
end;
end;
end;
end;
keep name1 name2;
run;
proc freq data=temp noprint;
table name1*name2/out=temp1;
run;
data key;
input name $upcase.;
cards;
a
b
c
;
proc sql;
create table temp2 as
select c.name1,c.name2,coalesce(d.count,0) as count
from
(select a.name as name1,b.name as name2 from
(select * from key) as a,(select * from key) as b) as c
left join temp1 as d
on c.name1=d.name1 and c.name2=d.name2 or c.name1=d.name2 and c.name2=d.name1
order by 1,2;
quit;
proc transpose data=temp2 out=want(drop=_name_);
by name1;
id name2;
var count;
run;
Works as what I want! Thanks.
What if I have number of products and I want to sum the products holding? How to add in the script?
ID | A|B|C
123| 2|1|0
231| 1|0|2
456| 0|2|0
759| 1|1|0
Result as below:
v1|A|B|C
A |0|5|3
B |5|2|0
C |3|0|0
Thanks again!
OK. Try this one .
data have;
infile cards dlm='|';
input ID A B C;
cards;
123| 2|1|0
231| 1|0|2
456| 0|2|0
759| 1|1|0
;
data temp;
set have;
array x{*} a b c;
zero=0;only=0;
do i=1 to dim(x);
if x{i}=0 then zero+1;
else only=x{i};
end;
if zero=dim(x)-1 then do;
idx=whichn(only,of x{*});
name1=upcase(vname(x{idx}));
name2=upcase(vname(x{idx}));
weight=only;
output;
end;
else do;
do i=1 to dim(x)-1;
do j=i+1 to dim(x);
if x{i} ne 0 and x{j} ne 0 then do;
name1=upcase(vname(x{i}));
name2=upcase(vname(x{j}));
weight=x{i}+x{j};
output;
end;
end;
end;
end;
keep name1 name2 weight;
run;
proc freq data=temp noprint;
table name1*name2/out=temp1;
weight weight;
run;
data key;
input name $upcase.;
cards;
a
b
c
;
proc sql;
create table temp2 as
select c.name1,c.name2,coalesce(d.count,0) as count
from
(select a.name as name1,b.name as name2 from
(select * from key) as a,(select * from key) as b) as c
left join temp1 as d
on c.name1=d.name1 and c.name2=d.name2 or c.name1=d.name2 and c.name2=d.name1
order by 1,2;
quit;
proc transpose data=temp2 out=want(drop=_name_);
by name1;
id name2;
var count;
run;
Hi @dearkyr
Another way to proceed:
data have;
input ID $ A B C;
cards;
123 1 1 0
231 1 0 1
456 0 1 0
759 1 1 0
;
run;
/* Retrieve the list of variables (except ID): A B C ... in macrovariable &list_letters */
proc contents data=have out=have_contents noprint;
run;
proc sql noprint;
select distinct name into: list_letters separated by " " from have_contents where name ne "ID";
quit;
/* Identify the frequency of each couple (e.g. A B, ...) */
data have_tr;
set have;
array _letter(*) &list_letters;
total = sum(of _letter(*));
do i=1 to dim(_letter);
do j=i to dim(_letter);
if _letter(i)=1 and _letter(j)=1 then
do;
couple_1=vname(_letter(i));
couple_2=vname(_letter(j));
if total = 1 or (total = 2 and couple_1 ne couple_2) then do;
couple=compress(catx("", couple_1, couple_2));
output;
end;
end;
end;
end;
keep ID couple;
run;
proc freq data=have_tr;
table couple / noprint out=have_freq (keep=couple count);
run;
/* Retrieve the list of distinct couples (AB, ...) in macrovariable &list_couple */
proc sql noprint;
select distinct couple into: list_couple separated by " " from have_tr;
quit;
/* Create the matrix table */
proc transpose data=have_freq out=have_tr2 (drop=_name_ _label_);
var count;
ID couple;
run;
proc transpose data=have out=structure (keep=_name_ rename=(_name_=V1));
var _numeric_;
run;
data want;
set structure;
if _n_ = 1 then set have_tr2;
array _matrix(*) &list_letters;
array _ref(*) &list_couple;
do i=1 to dim(_matrix);
do j=1 to dim(_ref);
if (substr(vname(_ref(j)),2,1) = V1
and substr(vname(_ref(j)),1,1) = vname(_matrix(i)))
or
(substr(vname(_ref(j)),1,1) = V1
and substr(vname(_ref(j)),2,1) = vname(_matrix(i)))
then _matrix(i) = _ref(j);
if _matrix(i) = . then _matrix(i) = 0;
end;
end;
keep V1 &list_letters;
run;
Hi @dearkyr
I have modified added a data step and also specify a WEIGHT statement in the proc freq to take into account the new request:
data have;
input ID $ A B C;
cards;
123 2 1 0
231 1 0 2
456 0 2 0
759 1 1 0
;
run;
/* Retrieve the list of variables (except ID): A B C ... in macrovariable &list_letters */
proc contents data=have out=have_contents noprint;
run;
proc sql noprint;
select distinct name into: list_letters separated by " " from have_contents where name ne "ID";
quit;
/* Identify the frequency of each couple (e.g. A B, ...) */
data have_tr;
set have;
array _letter(*) &list_letters;
total = sum(of _letter(*));
do i=1 to dim(_letter);
do j=i to dim(_letter);
if _letter(i)>0 and _letter(j)>0 then
do;
couple_1=vname(_letter(i));
couple_2=vname(_letter(j));
if total > 0 then do;
couple=compress(catx("", couple_1, couple_2));
output;
end;
end;
end;
end;
keep ID couple total;
run;
data have_tr1;
set have_tr;
by ID;
if (first.ID and last.ID)
or (substr(couple,1,1) ne substr(couple,2,1))
then output;
run;
proc freq data=have_tr1;
table couple / noprint out=have_freq (keep=couple count);
weight total;
run;
/* Retrieve the list of distinct couples (AB, ...) in macrovariable &list_couple */
proc sql noprint;
select distinct couple into: list_couple separated by " " from have_freq;
quit;
/* Create the matrix table */
proc transpose data=have_freq out=have_tr2 (drop=_name_ _label_);
var count;
ID couple;
run;
proc transpose data=have out=structure (keep=_name_ rename=(_name_=V1));
var _numeric_;
run;
data want;
set structure;
if _n_ = 1 then set have_tr2;
array _matrix(*) &list_letters;
array _ref(*) &list_couple;
do i=1 to dim(_matrix);
do j=1 to dim(_ref);
if (substr(vname(_ref(j)),2,1) = V1
and substr(vname(_ref(j)),1,1) = vname(_matrix(i)))
or
(substr(vname(_ref(j)),1,1) = V1
and substr(vname(_ref(j)),2,1) = vname(_matrix(i)))
then _matrix(i) = _ref(j);
if _matrix(i) = . then _matrix(i) = 0;
end;
end;
keep V1 &list_letters;
run;
Output:
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.