/* This file contains PostGIS code to create block-level estimates for the number of housing units, population and households for one county: Washington DC, five-digit FIPS = '11001'. Since there are over 3,200 county or county-level equivalents in the 56 states/district/ territories, python is used to automate the calculations. The code assumes the presence of several tables, using five-digit FIPS for counties or two- digit FIPS for states (these examples assume the county FIPS = 11001 and state FIPS = 11 for DC): osm data for the state/district/territory in osm.osm_11_polygon and osm.osm_11_line 2010 Census block shapes tl_2010_11_tabblock10.zip --> tiger_blocks.block_01_2010 2010 Census edge shapes tl_2010_01011_edges.zip --> tiger.edges_11001_2010 2015 Census edge shapes tl_2015_01011_edges.zip --> tiger.edges_11001_2015 Table with county-level estimates from US Census and other sources by year --> counts.county_pophu_2010_2015 Table with block-level 2010 US Census counts --> us2010 Please see documentation for more complete description of the sources and code. */ -- Create table with area ineligible for new locations ("park buffer") DROP TABLE IF EXISTS osm.parkarea_11; CREATE TABLE osm.parkarea_11 AS ( SELECT osm_id, access, "addr:housename", "addr:housenumber", "addr:interpolation", admin_level , aerialway, aeroway, amenity , area, barrier, bicycle, brand, bridge, boundary, building, construction, covered, culvert, cutting, denomination , disused, embankment, foot, "generator:source", harbour, highway, historic, horse, intermittent, junction, landuse , layer, leisure, lock, man_made , military, motorcar, "name", "natural", office, oneway, operator, place, population , power, power_source , public_transport, railway, ref, religion, route, service, shop, sport, surface, toll, tourism , "tower:type", tracktype, tunnel , water, waterway, wetland, width, wood, z_order, way_area, way FROM osm.osm_11_polygon WHERE leisure IN ('park', 'nature_reserve') OR boundary='national_park' OR landuse IN ('forest','cemetery','golf_course','landfill','military') OR military IS NOT NULL OR water IS NOT NULL OR wetland IS NOT NULL OR waterway='riverbank' OR tourism='zoo' OR aeroway='aerodrome' OR amenity IN ('university','college','hospital') OR "natural" IN ('water','wetland') OR "natural" IN ('wetland') UNION SELECT osm_id, access, "addr:housename", "addr:housenumber", "addr:interpolation", admin_level , aerialway, aeroway, amenity , area, barrier, bicycle, brand, bridge, boundary, building, construction, covered, culvert, cutting, denomination , disused, embankment, foot, "generator:source", harbour, highway, historic, horse, intermittent, junction, landuse , layer, leisure, lock, man_made , military, motorcar, "name", "natural", office, oneway, operator, place, population , power, power_source , public_transport, railway, ref, religion, route, service, shop, sport, surface, toll, tourism , "tower:type", tracktype, tunnel , water, waterway, wetland, width, wood, z_order , ST_Area(ST_MakePolygon(way)) as way_area, ST_MakePolygon(way) as way FROM osm.osm_11_line WHERE boundary='national_park' AND ST_ISclosed(way)); DROP TABLE IF EXISTS osm.parkarea_11_buffer; CREATE TABLE osm.parkarea_11_buffer AS (SELECT osm_id, ST_Buffer(way,30) as geom FROM osm.parkarea_11); CREATE INDEX park_11_geom_gist ON osm.parkarea_11_buffer USING gist(geom); SELECT UpdateGeometrySRID('osm','parkarea_11_buffer','geom','900913'); INSERT INTO osm.parkarea_11_buffer (SELECT geoid10::bigint as osm_id, ST_Transform(geom,900913) as geom FROM tiger_blocks.block_11_2010 WHERE aland10=0) -- Create state-level tables for all edges in a state (to allow for changes to county definitions) DROP TABLE IF EXISTS tiger.edges_roads_11_2010; CREATE TABLE tiger.edges_roads_11_2010 ( gid int, eligible_id int, county_fips character(5), tlid numeric(10,0), mtfcc varchar (5), fullname varchar(100), geom geometry); COMMENT ON TABLE tiger.edges_roads_11_2010 IS 'created_for_count_distro_in_900913_srid '; CREATE INDEX edges_roads_11_2010geom_gist ON tiger.edges_roads_11_2010 USING gist(geom);SELECT UpdateGeometrySRID('tiger','edges_roads_11_2010','geom','900913'); DROP TABLE IF EXISTS tiger.edges_roads_11_2015; CREATE TABLE tiger.edges_roads_11_2015 ( gid int, eligible_id int, county_fips character(5), tlid numeric(10,0), mtfcc varchar (5), fullname varchar(100), geom geometry); COMMENT ON TABLE tiger.edges_roads_11_2015 IS 'created_for_count_distro_in_900913_srid '; CREATE INDEX edges_roads_11_2015geom_gist ON tiger.edges_roads_11_2015 USING gist(geom);SELECT UpdateGeometrySRID('tiger','edges_roads_11_2015','geom','900913'); -- Insert data into two state-level tables created above DROP TABLE IF EXISTS tiger.temp_roads_2010; CREATE TABLE tiger.temp_roads_2010 AS ( SELECT gid, statefp||countyfp as county_fips, tlid, mtfcc, fullname, ST_Transform(geom,900913) as geom FROM tiger.edges_11001_2010 WHERE roadflg='Y'); CREATE INDEX temp_roads_geom_gist2010 ON tiger.temp_roads_2010 USING gist(geom); COMMIT; INSERT INTO tiger.edges_roads_11_2010 ( SELECT gid, CASE WHEN mtfcc IN ('S1200','S1400') AND park.gid IS NULL THEN row_number() OVER (order by CASE WHEN mtfcc IN ('S1200','S1400') and park.gid IS NULL THEN 1 ELSE 2 END) END as eligible_id , county_fips, tlid, mtfcc, fullname, geom FROM tiger.temp_roads_2010 LEFT JOIN (SELECT gid FROM tiger.temp_roads_2010 road JOIN osm.parkarea_10_buffer area ON ST_Intersects(area.geom, road.geom)=true WHERE ST_Length(ST_Intersection(area.geom, road.geom))/ST_Length(road.geom)>.25 GROUP BY gid) park USING (gid) ) DROP TABLE IF EXISTS tiger.temp_roads_2015; CREATE TABLE tiger.temp_roads_2015 AS ( SELECT gid, statefp||countyfp as county_fips, tlid, mtfcc, fullname, ST_Transform(geom,900913) as geom FROM tiger.edges_11001_2015 WHERE roadflg='Y'); CREATE INDEX temp_roads_geom_gist2015 ON tiger.temp_roads_2015 USING gist(geom); COMMIT; INSERT INTO tiger.edges_roads_11_2015 ( SELECT gid, CASE WHEN mtfcc IN ('S1200','S1400') AND park.gid IS NULL THEN row_number() OVER (order by CASE WHEN mtfcc IN ('S1200','S1400') and park.gid IS NULL THEN 1 ELSE 2 END) END as eligible_id , county_fips, tlid, mtfcc, fullname, geom FROM tiger.temp_roads_2015 LEFT JOIN (SELECT gid FROM tiger.temp_roads_2015 road JOIN osm.parkarea_10_buffer area ON ST_Intersects(area.geom, road.geom)=true WHERE ST_Length(ST_Intersection(area.geom, road.geom))/ST_Length(road.geom)>.25 GROUP BY gid) park USING (gid) ) -- Create buffered version of block table (to avoid problems where a road intersects a block boundary) DROP TABLE IF EXISTS tiger_blocks.block_11_buf_2010; CREATE TABLE tiger_blocks.block_11_buf_2010 ( gid integer NOT NULL PRIMARY KEY, block_fips character varying(15), geom geometry) WITH (OIDS=TRUE); CREATE INDEX tiger_blocks_block_11buf_geoid10_gist ON tiger_blocks.block_11_buf_2010 USING btree (block_fips); CREATE INDEX tiger_blocks_block_11buf_geom_gist ON tiger_blocks.block_11_buf_2010 USING gist (geom); CREATE INDEX tiger_blocks_block_11buf_gid_gist ON tiger_blocks.block_11_buf_2010 USING btree (gid); SELECT UpdateGeometrySRID('tiger_blocks','block_11_buf_2010','geom','900913'); INSERT INTO tiger_blocks.block_11_buf_2010 (SELECT gid,geoid10 as block_fips,ST_Buffer(ST_Transform(geom,900913),0.1) as geom FROM census2010.block_11); -- Create table showing roads associated with each block in each county. -- Note that this code uses 2015 county definitions (by using 2015 roads for the county) DROP TABLE IF EXISTS counts.block_roads_11001_2015; CREATE TABLE counts.block_roads_11001_2015 AS ( SELECT a.gid, eligible_id, block_fips, mtfcc, tlid, ST_Intersection(a.geom,b.geom) as geom FROM tiger.edges_roads_11_2015 a JOIN tiger_blocks.block_11_buf_2010 b ON ST_Intersects(a.geom,b.geom) WHERE left(block_fips,5)='11001' AND ST_GeometryType(a.geom)!='ST_Point'); COMMIT; SELECT UpdateGeometrySRID('counts','block_roads_11001_2015','geom','900913'); DROP TABLE IF EXISTS counts.block_roads_11001_2010; CREATE TABLE counts.block_roads_11001_2010 AS ( SELECT a.gid, eligible_id, block_fips, mtfcc, tlid, ST_Intersection(a.geom,b.geom) as geom FROM tiger.edges_roads_11_2010 a JOIN tiger_blocks.block_11_buf_2010 b ON ST_Intersects(a.geom,b.geom) WHERE block_fips IN (SELECT block_fips FROM counts.block_roads_11001_2015 GROUP BY block_fips) AND ST_GeometryType(a.geom)!='ST_Point'); COMMIT; SELECT UpdateGeometrySRID('counts','block_roads_11001_2010','geom','900913'); COMMIT; -- Create table to house block-level changes in housing unit and population counts DROP TABLE IF EXISTS counts.new_counts; CREATE TABLE counts.new_counts(block_fips character(15), new_road_hu numeric, new_road_pop numeric, removed_hu numeric, removed_pop numeric, added_hu numeric, added_pop numeric, date_created date); CREATE INDEX new_counts_block_idx ON counts.new_counts USING btree(block_fips); -- Identify blocks with new roads based and add HU/POP based on amount of new roads and starting HU/POP INSERT INTO counts.new_counts (block_fips, new_road_hu, new_road_pop, date_created) ( SELECT block_fips , CASE WHEN (hu2010)<5 THEN least(29683.0,round((sum(CASE WHEN r_end.eligible_id IS NOT NULL THEN ST_Length(r_end.geom) ELSE 0 END)-sum(CASE WHEN r_start.eligible_id IS NOT NULL THEN ST_Length(r_start.geom) ELSE 0 END))/30)) WHEN (sum(CASE WHEN r_end.eligible_id IS NOT NULL THEN ST_Length(r_end.geom) ELSE 0 END)=sum(CASE WHEN r_start.eligible_id IS NOT NULL THEN ST_Length(r_start.geom) ELSE 0 END)) THEN 0 ELSE least(29683.0,count(r_end.eligible_id)-count(r_start.eligible_id)) END as new_hu , CASE WHEN (pop2010)<10 THEN least(60513.0,round((sum(CASE WHEN r_end.eligible_id IS NOT NULL THEN ST_Length(r_end.geom) ELSE 0 END)-sum(CASE WHEN r_start.eligible_id IS NOT NULL THEN ST_Length(r_start.geom) ELSE 0 END))/15)) WHEN (sum(CASE WHEN r_end.eligible_id IS NOT NULL THEN ST_Length(r_end.geom) ELSE 0 END)=sum(CASE WHEN r_start.eligible_id IS NOT NULL THEN ST_Length(r_start.geom) ELSE 0 END)) THEN 0 ELSE least(60513.0,2*(count(r_end.eligible_id)-count(r_start.eligible_id))) END as new_pop , CURRENT_DATE as date_created FROM counts.block_roads_11001_2015 r_end LEFT JOIN counts.block_roads_11001_2010 r_start USING (tlid,block_fips) LEFT JOIN us2010 USING (block_fips) WHERE r_start.block_fips IS NULL AND left(block_fips,5)='11001' AND block_fips IN ( SELECT block_fips FROM (SELECT block_fips, sum(ST_Length(geom)) as road_len, sum(CASE WHEN mtfcc='S1400' THEN ST_Length(geom) ELSE 0 END) as el_len FROM counts.block_roads_11001_2015 GROUP BY block_fips) b LEFT JOIN (SELECT block_fips, sum(ST_Length(geom)) as road_len, sum(CASE WHEN mtfcc='S1400' THEN ST_Length(geom) ELSE 0 END) as el_len FROM counts.block_roads_11001_2010 GROUP BY block_fips) a USING (block_fips) LEFT JOIN (SELECT block_fips, ST_Perimeter(geom) as block_circ, ST_XMax(geom) - ST_XMin(geom) as a, ST_YMax(geom)-ST_YMin(geom) as b FROM tiger_blocks.block_11_meter_2010) c USING (block_fips) WHERE b.road_len-a.road_len>10 AND b.el_len-a.el_len>10 AND (b.road_len-a.road_len)/(b.el_len-a.el_len)>.50 AND ((b.el_len-a.el_len)/block_circ)>=1.1*sqrt(a^2+b^2)/(2*a+2*b) GROUP BY block_fips) GROUP BY block_fips, hu2010, pop2010 ); COMMIT; -- Find change in HU and POP in this county (11001) between 2010 and 2015 SELECT (huest_2015 - huest_2010)::int as delta_hu FROM counts.county_pophu_2010_2015 WHERE county_fips='11001' SELECT (popest_2015 - popest_2010)::int as delta_pop FROM counts.county_pophu_2010_2015 WHERE county_fips='11001' /* The code above returns delta_hu=9345 and delta_pop=67102. However, there are a number of HU and POP added to the area in the "new road" code above: 58 HU and 116 POP. Therefore the python code passes the code below the net value of HU and POP to be placed: 9287 HU (=9345-57) and 66986 POP (=67102-116). An alternative to using python would be to replace 9287 in the code below with: (SELECT (huest_2015 - huest_2010)::int as delta_hu FROM counts.county_pophu_2010_2015 WHERE county_fips='11001') - (SELECT sum(new_road_hu) FROM counts.new_counts WHERE left(block_fips,5)='11001') and replace 66986 with: (SELECT (popest_2015 - popest_2010)::int as delta_pop FROM counts.county_pophu_2010_2015 WHERE county_fips='11001') - (SELECT sum(new_road_pop) FROM counts.new_counts WHERE left(block_fips,5)='11001') */ -- Find points and associated blocks to add HU in counties where HU count has grown DROP TABLE IF EXISTS counts.onroad_points_hu_temp; CREATE TABLE counts.onroad_points_hu_temp AS ( SELECT row_number() OVER (PARTITION BY 1) as point_id, tlid, geom , ST_Line_Interpolate_Point(ST_LineMerge(geom),random()) as point, ST_Length(geom) as dist , ST_X(ST_StartPoint(ST_LineMerge(geom))) as x1, ST_Y(ST_StartPoint(ST_LineMerge(geom))) as y1 ,ST_X(ST_ENDPoint(ST_LineMerge(geom))) as x2, ST_Y(ST_ENDPoint(ST_LineMerge(geom))) as y2 FROM tiger.edges_roads_11_2015 RIGHT JOIN ( (SELECT generate_series(1,9287::int), ceil(random()*(SELECT max(eligible_id) FROM tiger.edges_roads_11_2015 WHERE county_fips='11001')) random_id)) id_generator ON eligible_id = random_id WHERE county_fips='11001'); SELECT UpdateGeometrySRID('counts','onroad_points_hu_temp','point','900913'); DROP TABLE IF EXISTS counts.offset_points_hu_11001_2015; CREATE TABLE counts.offset_points_hu_11001_2015 AS ( SELECT point_id, ST_Translate(point, (SELECT pos/abs(pos) FROM (SELECT random()-0.5 as pos) foo) * 5 * (y2-y1)/dist , (SELECT pos/abs(pos) FROM (SELECT random()-0.5 as pos) foo) * 5 * (x2-x1)/dist) as point FROM counts.onroad_points_hu_temp); SELECT UpdateGeometrySRID('counts','offset_points_hu_11001_2015','point','900913'); INSERT INTO counts.new_counts (block_fips,added_hu, date_created) (SELECT block_fips, count(*), CURRENT_DATE FROM (SELECT block_fips, point_id FROM counts.offset_points_hu_11001_2015,tiger_blocks.block_11_meter_2010 WHERE ST_Contains(geom,point) AND left(block_fips,5)='11001') foo GROUP BY block_fips); -- Find points and associated blocks to add POP in counties where POP count has grown DROP TABLE IF EXISTS counts.onroad_points_pop_temp; CREATE TABLE counts.onroad_points_pop_temp AS ( SELECT row_number() OVER (PARTITION BY 1) as point_id, tlid, geom , ST_Line_Interpolate_Point(ST_LineMerge(geom),random()) as point, ST_Length(geom) as dist , ST_X(ST_StartPoint(ST_LineMerge(geom))) as x1, ST_Y(ST_StartPoint(ST_LineMerge(geom))) as y1 , ST_X(ST_ENDPoint(ST_LineMerge(geom))) as x2, ST_Y(ST_ENDPoint(ST_LineMerge(geom))) as y2 FROM tiger.edges_roads_11_2015 RIGHT JOIN ( (SELECT generate_series(1,66986::int), ceil(random()*(SELECT max(eligible_id) FROM tiger.edges_roads_11_2015 WHERE county_fips='11001')) random_id)) id_generator ON eligible_id = random_id WHERE county_fips='11001'); SELECT UpdateGeometrySRID('counts','onroad_points_pop_temp','point','900913'); DROP TABLE IF EXISTS counts.offset_points_pop_11001_2015; CREATE TABLE counts.offset_points_pop_11001_2015 AS ( SELECT point_id, ST_Translate(point, (SELECT pos/abs(pos) FROM (SELECT random()-0.5 as pos) foo) * 5 * (y2-y1)/dist , (SELECT pos/abs(pos) FROM (SELECT random()-0.5 as pos) foo) * 5 * (x2-x1)/dist) as point FROM counts.onroad_points_pop_temp); SELECT UpdateGeometrySRID('counts','offset_points_pop_11001_2015','point','900913'); INSERT INTO counts.new_counts (block_fips,added_pop, date_created) (SELECT block_fips, count(*), CURRENT_DATE FROM (SELECT block_fips, point_id FROM counts.offset_points_pop_11001_2015,tiger_blocks.block_11_meter_2010 WHERE ST_Contains(geom,point) AND left(block_fips,5)='11001') foo GROUP BY block_fips); -- Remove HU counts from counties where HU count has gotten smaller -- The code is shown here for illustrative purposes, assuming a reduction in HU count of 100 DROP TABLE IF EXISTS temp_numberfield; CREATE TABLE temp_numberfield (n int); INSERT INTO temp_numberfield (n) SELECT generate_series(1,abs(-100)+1); INSERT INTO counts.new_counts (block_fips, removed_hu, date_created) ( SELECT block_fips, -1*count(*) as removed_hu, CURRENT_DATE FROM (SELECT block_fips, '11001'::char(5) as county_fips, row_number() OVER (ORDER BY num.n DESC) rank FROM us2010 CROSS JOIN temp_numberfield num WHERE left(block_fips,5)='11001' AND n<=hu2010) start_counts FULL JOIN (SELECT '11001'::char(5) as county_fips, abs(-100) as reduction) delta USING (county_fips) WHERE start_counts.rank<=delta.reduction GROUP BY block_fips); -- Remove POP counts from counties where POP count has gotten smaller -- The code is shown here for illustrative purposes, assuming a reduction in POP count of 100 DROP TABLE IF EXISTS temp_numberfield; CREATE TABLE temp_numberfield (n int); INSERT INTO temp_numberfield (n) SELECT generate_series(1,abs(-100)+1); INSERT INTO counts.new_counts (block_fips, removed_pop, date_created) ( SELECT block_fips, -1*count(*) as removed_pop, CURRENT_DATE FROM (SELECT block_fips, '11001'::char(5) as county_fips, row_number() OVER (ORDER BY num.n DESC) rank FROM us2010 CROSS JOIN temp_numberfield num WHERE left(block_fips,5)='11001' AND n<=pop2010) start_counts FULL JOIN (SELECT '11001'::char(5) as county_fips, abs(-100) as reduction) delta USING (county_fips) WHERE start_counts.rank<=delta.reduction GROUP BY block_fips); -- Create table with county-average households (HH) per housing unit (HU) -- This is used to calculate the HH in a block that previously had none CREATE TABLE county_hh_per_hu as ( SELECT * FROM ( SELECT left(block_fips,5) as county_fips , CASE WHEN sum(hu2010) OVER (PARTITION BY left(block_fips,5))>0 THEN sum(hh2010) OVER (PARTITION BY left(block_fips,5)) / sum(hu2010) OVER (PARTITION BY left(block_fips,5)) ELSE (SELECT sum(hh2010)/sum(hh2010) FROM us2010) END as county_hh_ratio FROM us2010 ) foo GROUP BY county_fips, county_hh_ratio ) -- Create output table with block-level estimates of HU, HH and POP in 2015 CREATE TABLE us2015_temp AS ( SELECT stateabbr, block_fips, hu2010, hh2010, pop2010 , hu2010+delta_hu_2010to2015 as hu2015, greatest(0,hh2010+delta_hh_2010to2015) as hh2015, pop2010+delta_pop_2010to2015 as pop2015 FROM ( SELECT stateabbr, block_fips, hu2010, hh2010, pop2010 , sum(coalesce(new_road_hu,0))+sum(coalesce(removed_hu,0))+sum(coalesce(added_hu,0)) as delta_hu_2010to2015 , round(CASE WHEN hh2010=0 THEN county_hh_ratio ELSE hh2010::numeric/hu2010::numeric END * (sum(coalesce(new_road_hu,0))+sum(coalesce(removed_hu,0))+sum(coalesce(added_hu,0))),4) as delta_hh_2010to2015 , sum(coalesce(new_road_pop,0))+sum(coalesce(removed_pop,0))+sum(coalesce(added_pop,0)) as delta_pop_2010to2015 FROM us2010 LEFT JOIN counts.countdelta2010to2015_temp USING (block_fips) LEFT JOIN county_hh_per_hu ON left(block_fips,5)=county_fips WHERE left(block_fips,2)='11' GROUP BY block_fips, stateabbr, hu2010, hh2010, pop2010, county_hh_ratio ) foo)