I'm guessing that this post is about neighborhoods in Cincinnati, Ohio. I used American FactFinder on the Census web site to find data from the 2010 Census with ages up to 110.
The files I downloaded from American FactFinder for Hamilton County, Ohio are attached. Cincinnati is located in Hamilton County, but not all the Hamilton County records are within Cincinnati.
There are 871 variables when you import the DEC_10_SF1_QTP2.csv. I dropped the variables that I didn't use, and renamed the rest to include the age categories.
* metadata CSV in download from American Fact Finder explains
* what the variable names mean. using the QT-P2 table from the
* 2010 Census for Hamilton County Ohio in this example. ;
FILENAME REFFILE1 '/folders/myfolders/Hamilton County Ohio 2010 age data/DEC_10_SF1_QTP2_metadata.csv';
data WORK.VARIABLE_LIST ;
%let _EFIERR_ = 0; /* set the ERROR detection macro variable */
infile REFFILE1 delimiter = ',' MISSOVER DSD ;
informat VAR1 $17. ;
informat VAR2 $79. ;
format VAR1 $17. ;
format VAR2 $79. ;
input
VAR1 $
VAR2 $
;
if _ERROR_ then call symputx('_EFIERR_',1); /* set ERROR detection macro variable */
run;
* table from American Fact Finder has lots of data.
* HD03_Snn is males per 100 females for various age groups.
* SUBHD0101_Snn has the number for both sexes for various age groups.
* SUBHD0102_Snn is the number of males for various age groups.
* SUBHD0103_Snn is the number of females for various age groups.
* SUBHD0201_Snn is the percent of the total population for various age groups.
* SUBHD0202_Snn is the percent male of the total population for various age groups.
* SUBHD0203_Snn is the percent female of the total population for various age groups.
* Assume we are only interested in some of the SUBHD0101_Snn variables ;
* want to rename the population counts ;
proc sql noprint;
create table age_vars as
select var1,
var2
from variable_list
where var1 contains 'SUBHD0101' and
var1 not in ('SUBHD0101_S02',
'SUBHD0101_S08',
'SUBHD0101_S14',
'SUBHD0101_S20',
'SUBHD0101_S26',
'SUBHD0101_S32',
'SUBHD0101_S38',
'SUBHD0101_S44',
'SUBHD0101_S50',
'SUBHD0101_S56',
'SUBHD0101_S62',
'SUBHD0101_S68',
'SUBHD0101_S74',
'SUBHD0101_S80',
'SUBHD0101_S86',
'SUBHD0101_S92',
'SUBHD0101_S98',
'SUBHD0101_S104',
'SUBHD0101_S110',
'SUBHD0101_S116');
select var1
into :keep_list separated by ' '
from variable_list
where var1 contains 'SUBHD0101' and
var1 not in ('SUBHD0101_S02',
'SUBHD0101_S08',
'SUBHD0101_S14',
'SUBHD0101_S20',
'SUBHD0101_S26',
'SUBHD0101_S32',
'SUBHD0101_S38',
'SUBHD0101_S44',
'SUBHD0101_S50',
'SUBHD0101_S56',
'SUBHD0101_S62',
'SUBHD0101_S68',
'SUBHD0101_S74',
'SUBHD0101_S80',
'SUBHD0101_S86',
'SUBHD0101_S92',
'SUBHD0101_S98',
'SUBHD0101_S104',
'SUBHD0101_S110',
'SUBHD0101_S116');
quit;
data age_vars;
length new_name $ 32;
set age_vars;
if var1='SUBHD0101_S01' then new_name='tot_pop';
else new_name='tot_pop'||tranwrd(trim(scan(var2,-1,'-')),' ','_');
run;
proc sql noprint;
select var1||" = "||new_name
into :rename_count_list separated by ' '
from age_vars;
quit;
* annotation CSV in download from American Fact Finder has geo_id, geo_id2, and
* geo_display_label, along with information about all the other variables. row 2 of
* the CSV has information that we don't want, so start on row 3. will have to keep
* and rename variables. we can easily get the census tract from geo_display_label. ;
FILENAME REFFILE2 '/folders/myfolders/Hamilton County Ohio 2010 age data/DEC_10_SF1_QTP2_ann.csv';
data geo_info;
%let _EFIERR_ = 0; /* set the ERROR detection macro variable */
infile REFFILE2 delimiter = ',' MISSOVER DSD firstobs=3 ;
informat geo_id $20. ;
informat geo_id2 $11. ;
informat geo_display_label $42. ;
format geo_id $20. ;
format geo_id2 $11. ;
format geo_display_label $42. ;
input
geo_id $
geo_id2 $
geo_display_label $
;
if _ERROR_ then call symputx('_EFIERR_',1); /* set ERROR detection macro variable */
run;
* data step below assumes that all the neighborhoods in Cincinnati have been listed.
* drop census tracts in Hamilton County but not in Cincinnati ;
data geo_info_cincy_neighborhoods;
length Neighborhood $ 29;
set geo_info;
censustract=input(tranwrd((scan(geo_display_label,1,',')),'Census Tract ',''),best.);
if censustract in(34,66,67,68,69) then Neighborhood = "Avondale ";
if censustract in(63,64) then Neighborhood = "Bond Hill";
if censustract in(45) then neighborhood = "California";
if censustract in(28) then neighborhood = "Camp Washington";
if censustract in(61) then neighborhood = "Carthage";
if censustract in(6,7) then neighborhood = "Downtown";
if censustract in(70,71,72) then neighborhood = "Clifton";
if censustract in(81,82.01,82.02,84,111,218.01,221.02) then neighborhood = "College Hill";
if censustract in(32,33) then neighborhood = "Corryville";
if censustract in(43,44) then neighborhood = "East End";
if censustract in(92,93,94,95,96) then neighborhood = "East Price Hill";
if censustract in(20,42) then neighborhood = "East Walnut Hills";
if censustract in(38,39,40) then neighborhood = "Evanston";
if censustract in(41) then neighborhood = "Evanston/East Walnut Hills";
if censustract in(25,26,27,29,30) then neighborhood = "CUF";
if censustract in(85.02) then neighborhood = "Roll Hill";
if censustract in(60,222) then neighborhood = "Hartwell";
if censustract in(49,50,51) then neighborhood = "Hyde Park";
if censustract in(58) then neighborhood = "Kennedy Heights";
if censustract in(47.02) then neighborhood = "Linwood";
if censustract in(91, 1) then neighborhood = "Lower Price Hill/Queensgate";
if censustract in(55,56,108,242,244) then neighborhood = "Madisonville";
if censustract in(12, 13) then neighborhood = "Mt. Adams";
if censustract in(83,85.01,208.11) then neighborhood = "Mt. Airy";
if censustract in(18,22,23) then neighborhood = "Mt. Auburn";
if censustract in(48) then neighborhood = "Mt. Lookout";
if censustract in(47.01) then neighborhood = "Mt. Lookout/Columbia Tusculum";
if censustract in(46.01,46.02,46.03,250.01) then neighborhood = "Mt. Washington";
if censustract in(65) then neighborhood = "North Avondale/Paddock Hills";
if censustract in(86.01) then neighborhood = "North Fairmount/English Woods";
if censustract in(74,75,78,79) then neighborhood = "Northside";
if censustract in(52,53,54,253) then neighborhood = "Oakley";
if censustract in(9,10,11,16,17) then neighborhood = "Over-the-Rhine";
if censustract in(57.01,57.02,59) then neighborhood = "Pleasant Ridge";
if censustract in(104) then neighborhood = "Riverside-Sayler Park";
if censustract in(62.01,62.02,110,232.01) then neighborhood = "Roselawn";
if censustract in(105,106) then neighborhood = "Sayler Park";
if censustract in(103) then neighborhood = "Sedamsville/Riverside";
if censustract in(73) then neighborhood = "Spring Grove Village";
if censustract in(77) then neighborhood = "South Cumminsville/Millvale";
if censustract in(87,89) then neighborhood = "South Fairmount";
if censustract in(19,21,35,36,37) then neighborhood = "Walnut Hills";
if censustract in(2,3.01,3.02,4,8,14,15) then neighborhood = "West End";
if censustract in(97,98,99.01,99.02,107,212.02,214.01) then neighborhood = "West Price Hill";
if censustract in(88,100.01,100.02,101,102.01,102.02,109,209.01,209.02) then neighborhood = "Westwood/East Westwood";
if censustract in(80) then neighborhood = "Winton Hills";
if neighborhood ne '';
run;
FILENAME REFFILE3 '/folders/myfolders/Hamilton County Ohio 2010 age data/DEC_10_SF1_QTP2.csv';
PROC IMPORT DATAFILE=REFFILE3
DBMS=CSV
OUT=population
replace;
GETNAMES=YES;
RUN;
data population(rename=(&rename_count_list));
set population(keep=geo_id &keep_list);
run;
proc sql;
create table cincy_neighborhood_pop as
select a.neighborhood,
a.censustract,
b.*
from geo_info_cincy_neighborhoods as a LEFT JOIN
population as b
on a.geo_id=b.geo_id
order by geo_id;
quit;
proc print data=cincy_neighborhood_pop split='_';
where censustract in (45, 218.01, 221.02, 222, 242,
244, 208.11, 250.01, 253, 232.01,
99.01, 99.02, 107, 212.02, 214.01,
209.01, 209.02);
title 'These are partial tracts in neighborhoods -- need to split data to avoid overcount';
run;
proc sql noprint;
select name
into :pop_counts_list separated by ' '
from sashelp.vcolumn
where libname='WORK' and
memname='CINCY_NEIGHBORHOOD_POP' and
upcase(name) contains 'TOT_POP';
quit;
%macro set_up_array;
array pop_counts [*] &pop_counts_list;
%mend set_up_array;
options mprint;
data cincy_partial_tracts_split_pop(drop=i);
%set_up_array ;
set cincy_neighborhood_pop;
if censustract in (45, 218.01, 221.02, 222, 242,
244, 208.11, 250.01, 253, 232.01,
99.01, 99.02, 107, 212.02, 214.01,
209.01, 209.02);
do i=1 to dim(pop_counts);
pop_counts[i] = round(pop_counts[i]/2);
end;
run;
proc print data=cincy_partial_tracts_split_pop split='_';
var neighborhood censustract geo_id &pop_counts_list ;
title 'After halving population counts in partial tracts';
run;
title;
data corrected_cincy_pop_counts;
update cincy_neighborhood_pop
cincy_partial_tracts_split_pop;
by censustract;
run;
proc sql;
select sum(tot_pop) as sum_before label=
'Total population in Cincinnati neighborhoods before splitting partial census tracts'
format=comma10.
from cincy_neighborhood_pop;
quit;
proc sql;
select sum(tot_pop) as sum_after label=
'Total population in Cincinnati neighborhoods AFTER splitting partial census tracts'
format=comma10.
from corrected_cincy_pop_counts;
quit;
The screen shot below from the log shows the array with the population counts.
Array with all the population counts
... View more