data US_Cities;
input Timezone $8. State $13. City $15. Population ;
datalines;
Alaska Alaska Anchorage 291826
Atlantic Puerto Rico San Antonio 1327407
Atlantic Puerto Rico San Juan 395326
Central Minnesota Minneapolis 382578
Central Texas Dallas 1197816
Eastern Florida Miami 399457
Eastern Maryland Baltimore 620961
Eastern Michigan Detroit 713777
Eastern New York New York 8175133
Mountain Arizona Phoenix 1445631
Mountain Colorado Denver 600158
Mountain Texas El Paso 649121
Pacific California Los Angeles 3792621
Pacific California Sacramento 466488
Pacific California San Francisco 805235
Pacific Nevada Las Vegas 583756
;
run;
proc sort data=us_cities;
by timezone state;
run;
data total_pop;
set us_cities;
by timezone state;
if first.timezone then cumpop=0;
cumpop+population;
run;
/* OR*/
proc report data=us_cities;
column Timezone State City Population ;
define population/analysis;
rbreak after/summarize;
compute after;
timezone='Total';
endcomp;
run;
/*OR */
proc sql;
create table cum_pop as
select a.timezone ,a.state,a.city ,a.population,sum(a.population)as cum_population
from us_cities as a
group by timezone ,state,city;
quit;
Required output
TIMEZONE | STATE | CITY | POPULATION | CUMUL_POP |
Alaska | Alaska | Anchorage | 291826 | 291826 |
Alaska | Alaska | Total | 291826 | 291826 |
Alaska | Total |
| 291826 | 291826 |
Atlantic | Puerto Rico | San Antonio | 1327407 | 1619233 |
Atlantic | Puerto Rico | San Juan | 395326 | 2014559 |
Atlantic | Puerto Rico | Total | 1722733 | 2014559 |
Atlantic | Total |
| 1722733 | 2014559 |
Central | Minnesota | Minneapolis | 382578 | 2397137 |
Central | Minnesota | Total | 382578 | 2397137 |
Central | Texas | Dallas | 1197816 | 3594953 |
Central | Texas | Total | 1197816 | 3594953 |
Central | Total |
| 1580394 | 3594953 |
Eastern | Florida | Miami | 399457 | 3994410 |
Eastern | Florida | Total | 399457 | 3994410 |
Eastern | Maryland | Baltimore | 620961 | 4615371 |
Eastern | Maryland | Total | 620961 | 4615371 |
Eastern | Michigan | Detroit | 713777 | 5329148 |
Eastern | Michigan | Total | 713777 | 5329148 |
Eastern | New York | New York | 8175133 | 13504281 |
Eastern | New York | Total | 8175133 | 13504281 |
Eastern | Total |
| 9909328 | 13504281 |
Mountain | Arizona | Phoenix | 1445631 | 14949912 |
Mountain | Arizona | Total | 1445631 | 14949912 |
Mountain | Colorado | Denver | 600158 | 15550070 |
Mountain | Colorado | Total | 600158 | 15550070 |
Mountain | Texas | El Paso | 649121 | 16199191 |
Mountain | Texas | Total | 649121 | 16199191 |
Mountain | Total |
| 2694910 | 16199191 |
Pacific | California | Los Angeles | 3792621 | 19991812 |
Pacific | California | Sacramento | 466488 | 20458300 |
Pacific | California | San Francisco | 805235 | 21263535 |
Pacific | California | Total | 5064344 | 21263535 |
Pacific | Nevada | Las Vegas | 583756 | 21847291 |
Pacific | Nevada | Total | 583756 | 21847291 |
Pacific | Total |
| 5648100 | 21847291 |
data US_Cities;
LENGTH Timezone $ 8 State $ 13 City $ 15 Population 8 ;
infile datalines delimiter='#';
input Timezone $ State $ City $ Population ;
Timezone=strip(Timezone);
State =strip(State);
City =strip(City);
datalines;
Alaska #Alaska #Anchorage #291826
Atlantic #Puerto Rico #San Antonio #1327407
Atlantic #Puerto Rico #San Juan #395326
Central #Minnesota #Minneapolis #382578
Central #Texas #Dallas #1197816
Eastern #Florida #Miami #399457
Eastern #Maryland #Baltimore #620961
Eastern #Michigan #Detroit #713777
Eastern #New York #New York #8175133
Mountain #Arizona #Phoenix #1445631
Mountain #Colorado #Denver #600158
Mountain #Texas #El Paso #649121
Pacific #California #Los Angeles #3792621
Pacific #California #Sacramento #466488
Pacific #California #San Francisco #805235
Pacific #Nevada #Las Vegas #583756
;
run;
proc sort data=us_cities;
by Timezone State City;
run;
data total_pop;
set us_cities;
by Timezone State City;
*retain cumul_ALL 0;
*cumul_ALL = cumul_ALL + population;
*if first.timezone then cumul_timezone=0;
*cumul_timezone+population;
*if first.state then cumul_state=0;
*cumul_state+population;
LABEL
population='population'
cumul_timezone='Cumulative#Number#Timezone'run;
cumul_state='Cumulative#Number#State'
cumul_ALL='Cumulative#Number#ALL'
;
format Population cumul_timezone cumul_state cumul_ALL comma11.;
run;
proc report data=total_pop split='#';
column Timezone State City Population
/* cumul_timezone cumul_state cumul_ALL*/ ;
define Timezone / order;
define State / order;
define City / display;
define Population / format=comma11. ;
*define cumul_timezone / format=comma11. ;
*define cumul_state / format=comma11. ;
*define cumul_ALL / format=comma11. ;
break after Timezone / summarize style=[color=green font_weight=bold];
break after State / summarize style=[color=magenta font_weight=bold];
rbreak after / summarize style=[color=red font_weight=bold];
title 'Total Population';
title2 "for &sysdate.";
run;
/* end of program */
Hello,
Much easier with a PROC REPORT or a PROC TABULATE but I thought you were looking after a data step solution:
data US_Cities;
LENGTH Timezone $ 8 State $ 13 City $ 15 Population 8 ;
infile datalines delimiter='#';
input Timezone $ State $ City $ Population ;
Timezone=strip(Timezone);
State =strip(State);
City =strip(City);
datalines;
Alaska #Alaska #Anchorage #291826
Atlantic #Puerto Rico #San Antonio #1327407
Atlantic #Puerto Rico #San Juan #395326
Central #Minnesota #Minneapolis #382578
Central #Texas #Dallas #1197816
Eastern #Florida #Miami #399457
Eastern #Maryland #Baltimore #620961
Eastern #Michigan #Detroit #713777
Eastern #New York #New York #8175133
Mountain #Arizona #Phoenix #1445631
Mountain #Colorado #Denver #600158
Mountain #Texas #El Paso #649121
Pacific #California #Los Angeles #3792621
Pacific #California #Sacramento #466488
Pacific #California #San Francisco #805235
Pacific #Nevada #Las Vegas #583756
;
run;
proc sort data=us_cities;
by timezone state;
run;
data total_pop;
set us_cities;
by timezone state;
retain cumul_ALL 0;
cumul_ALL = cumul_ALL + population;
if first.timezone then cumul_timezone=0;
cumul_timezone+population;
if first.state then cumul_state=0;
cumul_state+population;
output;
if last.state then do;
City='TOTAL'; output;
end;
if last.timezone then do;
State='TOTAL'; City=''; output;
end;
run;
/* end of program */
Koen
could you please give both proc report and proc tablulate solutions
data US_Cities;
LENGTH Timezone $ 8 State $ 13 City $ 15 Population 8 ;
infile datalines delimiter='#';
input Timezone $ State $ City $ Population ;
Timezone=strip(Timezone);
State =strip(State);
City =strip(City);
datalines;
Alaska #Alaska #Anchorage #291826
Atlantic #Puerto Rico #San Antonio #1327407
Atlantic #Puerto Rico #San Juan #395326
Central #Minnesota #Minneapolis #382578
Central #Texas #Dallas #1197816
Eastern #Florida #Miami #399457
Eastern #Maryland #Baltimore #620961
Eastern #Michigan #Detroit #713777
Eastern #New York #New York #8175133
Mountain #Arizona #Phoenix #1445631
Mountain #Colorado #Denver #600158
Mountain #Texas #El Paso #649121
Pacific #California #Los Angeles #3792621
Pacific #California #Sacramento #466488
Pacific #California #San Francisco #805235
Pacific #Nevada #Las Vegas #583756
;
run;
proc sort data=us_cities;
by Timezone State City;
run;
data total_pop;
set us_cities;
by Timezone State City;
*retain cumul_ALL 0;
*cumul_ALL = cumul_ALL + population;
*if first.timezone then cumul_timezone=0;
*cumul_timezone+population;
*if first.state then cumul_state=0;
*cumul_state+population;
LABEL
population='population'
cumul_timezone='Cumulative#Number#Timezone'run;
cumul_state='Cumulative#Number#State'
cumul_ALL='Cumulative#Number#ALL'
;
format Population cumul_timezone cumul_state cumul_ALL comma11.;
run;
proc report data=total_pop split='#';
column Timezone State City Population
/* cumul_timezone cumul_state cumul_ALL*/ ;
define Timezone / order;
define State / order;
define City / display;
define Population / format=comma11. ;
*define cumul_timezone / format=comma11. ;
*define cumul_state / format=comma11. ;
*define cumul_ALL / format=comma11. ;
break after Timezone / summarize style=[color=green font_weight=bold];
break after State / summarize style=[color=magenta font_weight=bold];
rbreak after / summarize style=[color=red font_weight=bold];
title 'Total Population';
title2 "for &sysdate.";
run;
/* end of program */
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.