BookmarkSubscribeRSS Feed

[SAS 프로그래밍 고수 백승민] [그룹처리] 그룹별 상위(하위) n개의 데이터 추출하는 방법

Started ‎06-11-2020 by
Modified ‎06-11-2020 by
Views 103

* 수정사항이나 추가 프로그램이 있으시면 아래 주소에 부탁드립니다;

* http://cafe.daum.net/statsas/3F8j/223;

 DATA BACK;
INPUT NAME $ PAY;
CARDS;
백승민 100
백승민 300
백승민 500
백승민 200
홍길동 100
홍길동 500
홍길동 300
홍길동 400
홍길동 700
;

PROC SORT DATA=BACK OUT=BACK_SORT;
     BY NAME PAY;
RUN;


* 1. RANK 프로시져 사용;
PROC RANK DATA=BACK_SORT OUT=BACK1(WHERE=(RANK<=2));
     BY NAME;
     RANKS RANK;
     VAR PAY;
RUN;

* 2. FIRST. 사용;
DATA BACK2;
 SET BACK_SORT;
     BY NAME PAY;
     IF FIRST.NAME THEN RANK=1;
     ELSE RANK+1;
     IF RANK <=2;
RUN;


* 3. 그룹내 그룹간 순위 메기기(Between and Within Group Counters);
* http://www.sascommunity.org/wiki/Tips:Between_and_Within_Group_Counters;
DATA BACK3;
 SET BACK_SORT;
     BY NAME PAY;
     WITHIN + (-FIRST.NAME * WITHIN)+ 1;
     IF WITHIN <=2;
RUN;

 

* 4. PROC SQL로 구현(사전 PROC SORT);
proc sql;
  create table back4 as
    select a.*,
           MONOTONIC()      as var1,
           MIN(MONOTONIC()) as var2,
           CALCULATED VAR1 - CALCULATED VAR2 + 1 AS RANK
    from   BACK_SORT a
    GROUP BY NAME;
quit;


* 5. PROC SQL로 구현(사전에 PROC SORT 사용 안함);
* SAS는 인라인뷰에서 ORDER BY 지원안함;

* 내부적으로 정렬 후에 MONOTONIC을 적용하기 위하여 INLINE 뷰 내부에 HAVING절을 삽입하고,
  MONOTONIC에 RANK변수를 지정;

* -> 내부 처리 방식 문제라 정확한 해답이라고 확답을 못하겠네요;
* -> Key값에 중복 발생시에는 문제;

* _TREE로 확인 필요;
PROC SQL;
  CREATE TABLE BACK5(WHERE=(RANK<=2)) AS
  SELECT A.*,
         MONOTONIC(SORTING)      AS VAR3,
         MIN(MONOTONIC(SORTING)) AS VAR4,
         CALCULATED VAR3 - CALCULATED VAR4 + 1 AS RANK
  FROM ( SELECT A.*,
                A.NAME||PUT(A.PAY,3.) AS SORTING
         FROM   BACK A
         GROUP BY 1,2
         HAVING MIN(SORTING) = SORTING
       ) A
  GROUP BY NAME
  ;
QUIT;

 

* 6. 자체 결합을 통하여 기준값(행) 보다 작은 행을 카운트;
PROC SQL;
  CREATE TABLE BACK6(WHERE=(RANK<=2)) AS
    SELECT A.NAME,
           A.PAY,
           COUNT(B.NAME)+1 AS RANK
    FROM   BACK A
           LEFT JOIN BACK B
                ON  A.NAME = B.NAME
                AND A.PAY  > B.PAY
    GROUP BY
           A.NAME,
           A.PAY;
QUIT;

Version history
Last update:
‎06-11-2020 10:09 PM
Updated by:
Contributors

sas-innovate-wordmark-2025-midnight.png

Register Today!

Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.


Register now!

Article Labels
Article Tags