Desktop productivity for business analysts and programmers

Classfication in SAS EG

Reply
Occasional Contributor
Posts: 9

Classfication in SAS EG

Hi Community, 

 

I have a set of transaction data with the following columns: Good_description, Customer_ID

 

As usual in the classification I defined my classes before. So that I have (in this example) 2 different classes. Class 1 is for textile goods und Class 2 is for fruits

 

Good_description: cotton , Customer_ID = 01 

Good_description: banana , Customer_ID = 01 

 

I want to build a classification, where I can identify customers, who buy goods from two different classes. 

 

1. question: Does it makes sense to build a decision tree here, if yes how can I produce one? 

2. question Which possibilities do I have in SAS EG to realize? 

 

Thanks

Mairam

PROC Star
Posts: 631

Re: Classfication in SAS EG

Posted in reply to Mairam2345

Not sure what you want to achieve. If you want a report on each customer on number of class items then here is one type of approach.

 

DATA HAVE;
INPUT CUSTOMER_ID :$ GOOD_DESCRIPTION :$;
DATALINES;
01 Cotton
01 Banana
02 Silk
03 Yarn
03 Apple
03 Cotton
03 Banana
04 Banana
;
RUN;

DATA Class_ID;
set have ;
IF GOOD_DESCRIPTION in ("Cotton","Silk","Yarn") THEN CLASS=1;
ELSE class=2;
run;
PROC SORT DATA=Class_ID;
by customer_id class;
run;
data Class_ID_Count(drop=good_description);
Set Class_ID;
by customer_id class;
if first.class then count=1;
else count+1;
if last.class;
run;

proc transpose data=Class_ID_Count out=want(Drop=_name_) prefix=Class_;
by customer_id;
id class;
var count;
run;
data want;
set want;
array num _numeric_;
do over num;
if num=. then num=0;
end;
run;
proc print data=want;
run;
Thanks,
Suryakiran
Esteemed Advisor
Posts: 5,625

Re: Classfication in SAS EG

Posted in reply to Mairam2345

Following @SuryaKiran's appoach, and an alternative

 

DATA TRANSACT;
INPUT CUSTOMER_ID $ GOOD_DESCRIPTION $;
DATALINES;
01 Cotton
01 Banana
02 Silk
03 Yarn
03 Apple
03 Cotton
03 Banana
04 Banana
04 Barley
;

data CLASS;
input class GOOD_DESCRIPTION $;
datalines;
1 Cotton
1 Silk
1 Yarn
0 Apple
0 Banana
0 Pear
;

proc sql;
create table TRANSACT_CLASS as
select 
    CUSTOMER_ID,
    coalesce( class, 999) as class,
    count(*) as nbTransact
from 
    TRANSACT as T left join
    CLASS as C on T.GOOD_DESCRIPTION=C.GOOD_DESCRIPTION
group by CUSTOMER_ID, class;
quit;

proc transpose data=TRANSACT_CLASS 
    out=CUSTOMER_CLASS(drop=_name_) prefix=class_;
by CUSTOMER_ID;
var nbTransact;
id class;
run;

proc print data=CUSTOMER_CLASS noobs; run;

/* Or With a format and proc freq, get the proportions (Probably faster) */

proc format;
value $good_class
"Cotton" = "1"
"Silk"   = "1"
"Yarn"   = "1"
"Apple"  = "0"
"Banana" = "0"
"Pear"   = "0"
other    = "999";
run;

proc freq data=TRANSACT noprint;
format GOOD_DESCRIPTION $good_class.;
table customer_id*good_description / out=class_pct outpct;
run;

proc transpose data=class_pct 
    out=CUSTOMER_CLASS_PCT(drop=_name_ _label_) prefix=class_;
by CUSTOMER_ID;
var pct_row;
format pct_row 4.1;
id good_description;
run;

proc print data=CUSTOMER_CLASS_PCT noobs; run;
PG
Ask a Question
Discussion stats
  • 2 replies
  • 193 views
  • 0 likes
  • 3 in conversation