Query in proc sql to obtain count where price is higher than average p...

msf2021 · Posted 06-24-2022 07:48 AM

Hello everyone,

I have the following tables:

/* Tabela Artigos */
proc sql;
   create table artigos
       (REFERENCIA char(1),
        ID_DPTO num,
		COD_FAMILIA char(3),
        DATA_ALTA num informat=DDMMYY10.
                  format=DDMMYY10.
        );

insert into artigos
    values('A',1,'1F1','21FEB10'd)
    values('B',2,'2F1','07FEB14'd)
    values('C',2,'2F1','15MAY14'd)
	values('D',2,'2F1','01OCT05'd)
	values('E',2,'2F1','31OCT10'd)
	values('F',3,'3F1','20APR01'd)
	values('G',3,'3F2','14FEB15'd)
	values('H',4,'4F1','25MAR09'd);
quit;


/* Tabela Campanhas */
proc sql;
   create table campanhas
       (ID_CAMPANHA char(3),
        TIPO char(20),
		DATA_CAMP num informat=DDMMYY10.
                  format=DDMMYY10.
        );

insert into campanhas
    values('CA1','VENDA','05FEB12'd)
    values('CA2','VENDA','04FEB12'd)
    values('CA3','PÓS-VENDA','02OCT09'd)
	values('CA4','VENDA','06MAY10'd)
	values('CA5','PÓS-VENDA','30NOV10'd)
	values('CA6','PÓS-VENDA','07FEB15'd);
quit;


/* Tabela Departamento Campanha */
proc sql;
   create table depto_campanha
       (ID_DPTO num,
        ID_CAMPANHA char(3) 
        );

insert into depto_campanha
    values(1,'CA1')
    values(1,'CA4')
    values(2,'CA2')
	values(3,'CA2')
	values(3,'CA5')
	values(3,'CA6')
	values(4,'CA1')
	values(4,'CA2')
	values(4,'CA3')
	values(4,'CA6');
quit;



/* Tabela Vendas */
proc sql;
   create table vendas
       (TALAO num informat=z2.
                  format=z2.,
        REFERENCIA char(1),
		PRECO num,
        DATA_VENDA num informat=DDMMYY10.
                  format=DDMMYY10.
        );

insert into vendas
    values(01,'A',15,'11FEB12'd)
	values(02,'A',18,'15FEB12'd)
	values(03,'A',14,'02OCT13'd)
	values(04,'D',37.95,'06AUG12'd)
	values(05,'E',125.95,'30NOV10'd)
	values(06,'E',150,'05FEB11'd)
	values(07,'H',22.99,'11APR10'd)
	values(08,'H',24,'08AUG11'd);
quit;

I need to create a query (only one and in sql) in which, for each campaign of the "VENDA" type (column "TIPO"), i obtain the average value of beads (column "TALAO"), the number of purchased references (column "REFERENCIAS") and the number of beads ("TALAO") with sales (column "PRECO") higher than the average value of the receipt.

I already could accomplish part of it but in my query i don't obtai the last part "number of beads with sales higher than the average value of the receipt". Can anyone help me here?

My solution so far:

proc sql;
select 
t1.id_campanha
,t1.tipo
,avg(t4.preco) as avg_price_beads 
,count(distinct t3.referencia) as nr_referencies
,count(distinct t4.talao) as nr_beads 
from campanhas t1
left join depto_campanha t2 on t1.id_campanha=t2.id_campanha
left join artigos t3 on t2.id_dpto=t3.id_dpto
inner join vendas t4 on t4.referencia=t3.referencia 
where t1.tipo="VENDA"
group by 1,2
having  t4.preco > avg_price_beads ;
quit;

I also tried:

proc sql;
select 
t1.id_campanha
,t1.tipo
,avg(t4.preco) as avg_price_beads 
,count(distinct t3.referencia) as nr_referencies
,count(distinct case when  t4.preco > calculated avg_price_beads then t4.talao end ) as nr_beads 
from campanhas t1
left join depto_campanha t2 on t1.id_campanha=t2.id_campanha
left join artigos t3 on t2.id_dpto=t3.id_dpto
inner join vendas t4 on t4.referencia=t3.referencia 
where t1.tipo="VENDA"
group by 1,2
 ;
quit;

PaigeMiller · Posted 06-24-2022 08:37 AM

I'm sure you can do this in PROC SQL, but I won't give it a try. I would use PROC STDIZE to determine which observations are greater than the average, and then count those using PROC FREQ.

UNTESTED CODE

proc stdize data=have out=intermediate;
    var x1; /* Your variable name goes here */
run;
proc format;
    value gt_avg low-0='Below Average' 0<-high='Above Average';
run;
proc freq data=intermediate;
    tables x1/out=want; /* Your variable name goes here */
    format x1 gt_avg.; /* Your variable name goes here */
run;

--
Paige Miller

tarheel13 · Posted 06-24-2022 10:03 AM

Which table is receipts?

msf2021 · Posted 06-26-2022 09:33 AM

table "Vendas"

tarheel13 · Posted 06-26-2022 12:25 PM

ok the requirements are a little unclear to me. I don't have a good enough understanding of your data.

msf2021 · Posted 06-26-2022 12:52 PM

What about now:

/* Table Articles */
proc sql;
   create table articles
       (REFERENCY char(1),
        ID_DPTO num,
        COD_FAMILIA char(3),
        DATA_ALTA num informat=DDMMYY10.
                  format=DDMMYY10.
        );

insert into articles
    values('A',1,'1F1','21FEB10'd)
    values('B',2,'2F1','07FEB14'd)
    values('C',2,'2F1','15MAY14'd)
    values('D',2,'2F1','01OCT05'd)
    values('E',2,'2F1','31OCT10'd)
    values('F',3,'3F1','20APR01'd)
    values('G',3,'3F2','14FEB15'd)
    values('H',4,'4F1','25MAR09'd);
quit;


/* Table CAMPAIGNS */
proc sql;
   create table CAMPAIGNS
       (ID_CAMPAIGN char(3),
        TYPE char(20),
        DATA_CAMP num informat=DDMMYY10.
                  format=DDMMYY10.
        );

insert into CAMPAIGNS
    values('CA1','VENDA','05FEB12'd)
    values('CA2','VENDA','04FEB12'd)
    values('CA3','PÓS-VENDA','02OCT09'd)
    values('CA4','VENDA','06MAY10'd)
    values('CA5','PÓS-VENDA','30NOV10'd)
    values('CA6','PÓS-VENDA','07FEB15'd);
quit;


/* Table DEPARTMENT CAMPAIGN */
proc sql;
   create table depto_CAMPAIGN
       (ID_DPTO num,
        ID_CAMPAIGN char(3) 
        );

insert into depto_CAMPAIGN
    values(1,'CA1')
    values(1,'CA4')
    values(2,'CA2')
    values(3,'CA2')
    values(3,'CA5')
    values(3,'CA6')
    values(4,'CA1')
    values(4,'CA2')
    values(4,'CA3')
    values(4,'CA6');
quit;



/* Table SALES */
proc sql;
   create table SALES
       (BEAD num informat=z2.
                  format=z2.,
        REFERENCY char(1),
        PRICE num,
        DATA_VENDA num informat=DDMMYY10.
                  format=DDMMYY10.
        );

insert into SALES
    values(01,'A',15,'11FEB12'd)
    values(02,'A',18,'15FEB12'd)
    values(03,'A',14,'02OCT13'd)
    values(04,'D',37.95,'06AUG12'd)
    values(05,'E',125.95,'30NOV10'd)
    values(06,'E',150,'05FEB11'd)
    values(07,'H',22.99,'11APR10'd)
    values(08,'H',24,'08AUG11'd);
quit;

I need to create a query (only one and in sql) in which, for each campaign of the "VENDA" type (column "TYPE"), i obtain the average value of beads (column "BEAD"), the number of purchased references (column "REFERENCYS") and the number of beads ("BEAD") with sales (column "PRICE") higher than the average price.

tarheel13 · Posted 06-27-2022 03:23 PM

hmm sort of. how are you getting average price. it seems like this could be done with a subquery.

Query in proc sql to obtain count where price is higher than average price

Re: Query in proc sql to obtain count where price is higher than average price

Re: Query in proc sql to obtain count where price is higher than average price

Re: Query in proc sql to obtain count where price is higher than average price

Re: Query in proc sql to obtain count where price is higher than average price

Re: Query in proc sql to obtain count where price is higher than average price

Re: Query in proc sql to obtain count where price is higher than average price

Classroom Training Available!