]> git.openstreetmap.org Git - nominatim.git/blob - data-sources/us-tiger/tiger_address_convert.py
setup: delete invalid indices in create-search-indices step
[nominatim.git] / data-sources / us-tiger / tiger_address_convert.py
1 #!/usr/bin/python3
2 # Tiger road data to OSM conversion script
3 # Creates Karlsruhe-style address ways beside the main way
4 # based on the Massachusetts GIS script by christopher schmidt
5
6 #BUGS:
7 # On very tight curves, a loop may be generated in the address way.
8 # It would be nice if the ends of the address ways were not pulled back from dead ends
9
10
11 # Ways that include these mtfccs should not be uploaded
12 # H1100 Connector
13 # H3010 Stream/River
14 # H3013 Braided Stream
15 # H3020 Canal, Ditch or Aqueduct
16 # L4130 Point-to-Point Line
17 # L4140 Property/Parcel Line (Including PLSS)
18 # P0001 Nonvisible Linear Legal/Statistical Boundary
19 # P0002 Perennial Shoreline
20 # P0003 Intermittent Shoreline
21 # P0004 Other non-visible bounding Edge (e.g., Census water boundary, boundary of an areal feature)
22 ignoremtfcc = [ "H1100", "H3010", "H3013", "H3020", "L4130", "L4140", "P0001", "P0002", "P0003", "P0004" ]
23
24 # Sets the distance that the address ways should be from the main way, in feet.
25 address_distance = 30
26
27 # Sets the distance that the ends of the address ways should be pulled back from the ends of the main way, in feet
28 address_pullback = 45
29
30 import sys, os.path, json
31 try:
32     from osgeo import ogr
33     from osgeo import osr
34 except:
35     import ogr
36     import osr
37
38 # https://www.census.gov/geo/reference/codes/cou.html 
39 # tiger_county_fips.json was generated from the following:
40 # wget https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt
41 # cat national_county.txt | perl -F, -naE'($F[0] ne 'AS') && $F[3] =~ s/ ((city|City|County|District|Borough|City and Borough|Municipio|Municipality|Parish|Island|Census Area)(?:, |\Z))+//; say qq(  "$F[1]$F[2]": "$F[3], $F[0]",)'
42 json_fh = open(os.path.dirname(sys.argv[0]) + "/tiger_county_fips.json")
43 county_fips_data = json.load(json_fh)
44
45 def parse_shp_for_geom_and_tags( filename ):
46     #ogr.RegisterAll()
47
48     dr = ogr.GetDriverByName("ESRI Shapefile")
49     poDS = dr.Open( filename )
50
51     if poDS == None:
52         raise "Open failed."
53
54     poLayer = poDS.GetLayer( 0 )
55
56     fieldNameList = []
57     layerDefinition = poLayer.GetLayerDefn()
58     for i in range(layerDefinition.GetFieldCount()):
59         fieldNameList.append(layerDefinition.GetFieldDefn(i).GetName())
60     # sys.stderr.write(",".join(fieldNameList))
61
62     poLayer.ResetReading()
63
64     ret = []
65
66     poFeature = poLayer.GetNextFeature()
67     while poFeature:
68         tags = {}
69         
70         # WAY ID
71         tags["tiger:way_id"] = int( poFeature.GetField("TLID") )
72         
73         # FEATURE IDENTIFICATION
74         mtfcc = poFeature.GetField("MTFCC");
75         if mtfcc != None:
76
77             if mtfcc == "L4010":        #Pipeline
78                 tags["man_made"] = "pipeline"
79             if mtfcc == "L4020":        #Powerline
80                 tags["power"] = "line"
81             if mtfcc == "L4031":        #Aerial Tramway/Ski Lift
82                 tags["aerialway"] = "cable_car"
83             if mtfcc == "L4110":        #Fence Line
84                 tags["barrier"] = "fence"
85             if mtfcc == "L4125":        #Cliff/Escarpment
86                 tags["natural"] = "cliff"
87             if mtfcc == "L4165":        #Ferry Crossing
88                 tags["route"] = "ferry"
89             if mtfcc == "R1011":        #Railroad Feature (Main, Spur, or Yard)
90                 tags["railway"] = "rail"
91                 ttyp = poFeature.GetField("TTYP")
92                 if ttyp != None:
93                     if ttyp == "S":
94                         tags["service"] = "spur"
95                     if ttyp == "Y":
96                         tags["service"] = "yard"
97                     tags["tiger:ttyp"] = ttyp
98             if mtfcc == "R1051":        #Carline, Streetcar Track, Monorail, Other Mass Transit Rail)
99                 tags["railway"] = "light_rail"
100             if mtfcc == "R1052":        #Cog Rail Line, Incline Rail Line, Tram
101                 tags["railway"] = "incline"
102             if mtfcc == "S1100":
103                 tags["highway"] = "primary"
104             if mtfcc == "S1200":
105                 tags["highway"] = "secondary"
106             if mtfcc == "S1400":
107                 tags["highway"] = "residential"
108             if mtfcc == "S1500":
109                 tags["highway"] = "track"
110             if mtfcc == "S1630":        #Ramp
111                 tags["highway"] = "motorway_link"
112             if mtfcc == "S1640":        #Service Drive usually along a limited access highway
113                 tags["highway"] = "service"
114             if mtfcc == "S1710":        #Walkway/Pedestrian Trail
115                 tags["highway"] = "path"
116             if mtfcc == "S1720":
117                 tags["highway"] = "steps"
118             if mtfcc == "S1730":        #Alley
119                 tags["highway"] = "service"
120                 tags["service"] = "alley"
121             if mtfcc == "S1740":        #Private Road for service vehicles (logging, oil, fields, ranches, etc.)
122                 tags["highway"] = "service"
123                 tags["access"] = "private"
124             if mtfcc == "S1750":        #Private Driveway
125                 tags["highway"] = "service"
126                 tags["access"] = "private"
127                 tags["service"] = "driveway"
128             if mtfcc == "S1780":        #Parking Lot Road
129                 tags["highway"] = "service"
130                 tags["service"] = "parking_aisle"
131             if mtfcc == "S1820":        #Bike Path or Trail
132                 tags["highway"] = "cycleway"
133             if mtfcc == "S1830":        #Bridle Path
134                 tags["highway"] = "bridleway"
135             tags["tiger:mtfcc"] = mtfcc
136
137         # FEATURE NAME
138         if poFeature.GetField("FULLNAME"):
139             #capitalizes the first letter of each word
140             name = poFeature.GetField( "FULLNAME" )
141             tags["name"] = name
142
143             #Attempt to guess highway grade
144             if name[0:2] == "I-":
145                 tags["highway"] = "motorway"
146             if name[0:3] == "US ":
147                 tags["highway"] = "primary"
148             if name[0:3] == "US-":
149                 tags["highway"] = "primary"
150             if name[0:3] == "Hwy":
151                 if tags["highway"] != "primary":
152                     tags["highway"] = "secondary"
153
154         # TIGER 2017 no longer contains this field
155         if 'DIVROAD' in fieldNameList:
156             divroad = poFeature.GetField("DIVROAD")
157             if divroad != None:
158                 if divroad == "Y" and "highway" in tags and tags["highway"] == "residential":
159                     tags["highway"] = "tertiary"
160                 tags["tiger:separated"] = divroad
161
162         statefp = poFeature.GetField("STATEFP")
163         countyfp = poFeature.GetField("COUNTYFP")
164         if (statefp != None) and (countyfp != None):
165             county_name = county_fips_data.get(statefp + '' + countyfp)
166             if county_name:
167                 tags["tiger:county"] = county_name
168
169         # tlid = poFeature.GetField("TLID")
170         # if tlid != None:
171         #     tags["tiger:tlid"] = tlid
172
173         lfromadd = poFeature.GetField("LFROMADD")
174         if lfromadd != None:
175             tags["tiger:lfromadd"] = lfromadd
176
177         rfromadd = poFeature.GetField("RFROMADD")
178         if rfromadd != None:
179             tags["tiger:rfromadd"] = rfromadd
180
181         ltoadd = poFeature.GetField("LTOADD")
182         if ltoadd != None:
183             tags["tiger:ltoadd"] = ltoadd
184
185         rtoadd = poFeature.GetField("RTOADD")
186         if rtoadd != None:
187             tags["tiger:rtoadd"] = rtoadd
188
189         zipl = poFeature.GetField("ZIPL")
190         if zipl != None:
191             tags["tiger:zip_left"] = zipl
192
193         zipr = poFeature.GetField("ZIPR")
194         if zipr != None:
195             tags["tiger:zip_right"] = zipr
196
197         if mtfcc not in ignoremtfcc:
198             # COPY DOWN THE GEOMETRY
199             geom = []
200             
201             rawgeom = poFeature.GetGeometryRef()
202             for i in range( rawgeom.GetPointCount() ):
203                 geom.append( (rawgeom.GetX(i), rawgeom.GetY(i)) )
204     
205             ret.append( (geom, tags) )
206         poFeature = poLayer.GetNextFeature()
207         
208     return ret
209
210
211 # ====================================
212 # to do read .prj file for this data
213 # Change the Projcs_wkt to match your datas prj file.
214 # ====================================
215 projcs_wkt = \
216 """GEOGCS["GCS_North_American_1983",
217         DATUM["D_North_American_1983",
218         SPHEROID["GRS_1980",6378137,298.257222101]],
219         PRIMEM["Greenwich",0],
220         UNIT["Degree",0.017453292519943295]]"""
221
222 from_proj = osr.SpatialReference()
223 from_proj.ImportFromWkt( projcs_wkt )
224
225 # output to WGS84
226 to_proj = osr.SpatialReference()
227 to_proj.SetWellKnownGeogCS( "EPSG:4326" )
228
229 tr = osr.CoordinateTransformation( from_proj, to_proj )
230
231 import math
232 def length(segment, nodelist):
233     '''Returns the length (in feet) of a segment'''
234     first = True
235     distance = 0
236     lat_feet = 364613  #The approximate number of feet in one degree of latitude
237     for point in segment:
238         pointid, (lat, lon) = nodelist[ round_point( point ) ]
239         if first:
240             first = False
241         else:
242             #The approximate number of feet in one degree of longitute
243             lrad = math.radians(lat)
244             lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
245             distance += math.sqrt(((lat - previous[0])*lat_feet)**2 + ((lon - previous[1])*lon_feet)**2)
246         previous = (lat, lon)
247     return distance
248
249 def addressways(waylist, nodelist, first_id):
250     id = first_id
251     lat_feet = 364613  #The approximate number of feet in one degree of latitude
252     distance = float(address_distance)
253     ret = []
254
255     for waykey, segments in waylist.items():
256         waykey = dict(waykey)
257         rsegments = []
258         lsegments = []
259         for segment in segments:
260             lsegment = []
261             rsegment = []
262             lastpoint = None
263
264             # Don't pull back the ends of very short ways too much
265             seglength = length(segment, nodelist)
266             if seglength < float(address_pullback) * 3.0:
267                 pullback = seglength / 3.0
268             else:
269                 pullback = float(address_pullback)
270             if "tiger:lfromadd" in waykey:
271                 lfromadd = waykey["tiger:lfromadd"]
272             else:
273                 lfromadd = None
274             if "tiger:ltoadd" in waykey:
275                 ltoadd = waykey["tiger:ltoadd"]
276             else:
277                 ltoadd = None
278             if "tiger:rfromadd" in waykey:
279                 rfromadd = waykey["tiger:rfromadd"]
280             else: 
281                 rfromadd = None
282             if "tiger:rtoadd" in waykey:
283                 rtoadd = waykey["tiger:rtoadd"]
284             else:
285                 rtoadd = None
286             if rfromadd != None and rtoadd != None:
287                 right = True
288             else:
289                 right = False
290             if lfromadd != None and ltoadd != None:
291                 left = True
292             else:
293                 left = False
294             if left or right:
295                 first = True
296                 firstpointid, firstpoint = nodelist[ round_point( segment[0] ) ]
297
298                 finalpointid, finalpoint = nodelist[ round_point( segment[len(segment) - 1] ) ]
299                 for point in segment:
300                     pointid, (lat, lon) = nodelist[ round_point( point ) ]
301
302                     #The approximate number of feet in one degree of longitute
303                     lrad = math.radians(lat)
304                     lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
305
306 #Calculate the points of the offset ways
307                     if lastpoint != None:
308                         #Skip points too close to start
309                         if math.sqrt((lat * lat_feet - firstpoint[0] * lat_feet)**2 + (lon * lon_feet - firstpoint[1] * lon_feet)**2) < pullback:
310                             #Preserve very short ways (but will be rendered backwards)
311                             if pointid != finalpointid:
312                                 continue
313                         #Skip points too close to end
314                         if math.sqrt((lat * lat_feet - finalpoint[0] * lat_feet)**2 + (lon * lon_feet - finalpoint[1] * lon_feet)**2) < pullback:
315                             #Preserve very short ways (but will be rendered backwards)
316                             if (pointid != firstpointid) and (pointid != finalpointid):
317                                 continue
318
319                         X = (lon - lastpoint[1]) * lon_feet
320                         Y = (lat - lastpoint[0]) * lat_feet
321                         if Y != 0:
322                             theta = math.pi/2 - math.atan( X / Y)
323                             Xp = math.sin(theta) * distance
324                             Yp = math.cos(theta) * distance
325                         else:
326                             Xp = 0
327                             if X > 0:
328                                 Yp = -distance
329                             else:
330                                 Yp = distance
331
332                         if Y > 0:
333                             Xp = -Xp
334                         else:
335                             Yp = -Yp
336                                 
337                         if first:
338                             first = False
339                             dX =  - (Yp * (pullback / distance)) / lon_feet #Pull back the first point
340                             dY = (Xp * (pullback / distance)) / lat_feet
341                             if left:
342                                 lpoint = (lastpoint[0] + (Yp / lat_feet) - dY, lastpoint[1] + (Xp / lon_feet) - dX)
343                                 lsegment.append( (id, lpoint) )
344                                 id += 1
345                             if right:
346                                 rpoint = (lastpoint[0] - (Yp / lat_feet) - dY, lastpoint[1] - (Xp / lon_feet) - dX)
347                                 rsegment.append( (id, rpoint) )
348                                 id += 1
349
350                         else:
351                             #round the curves
352                             if delta[1] != 0:
353                                 theta = abs(math.atan(delta[0] / delta[1]))
354                             else:
355                                 theta = math.pi / 2
356                             if Xp != 0:
357                                 theta = theta - abs(math.atan(Yp / Xp))
358                             else: theta = theta - math.pi / 2
359                             r = 1 + abs(math.tan(theta/2))
360                             if left:
361                                 lpoint = (lastpoint[0] + (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] + (Xp + delta[1]) * r / (lon_feet * 2))
362                                 lsegment.append( (id, lpoint) )
363                                 id += 1
364                             if right:
365                                 rpoint = (lastpoint[0] - (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] - (Xp + delta[1]) * r / (lon_feet * 2))
366                                 
367                                 rsegment.append( (id, rpoint) )
368                                 id += 1
369
370                         delta = (Yp, Xp)
371
372                     lastpoint = (lat, lon)
373
374
375 #Add in the last node
376                 dX =  - (Yp * (pullback / distance)) / lon_feet
377                 dY = (Xp * (pullback / distance)) / lat_feet
378                 if left:
379                     lpoint = (lastpoint[0] + (Yp + delta[0]) / (lat_feet * 2) + dY, lastpoint[1] + (Xp + delta[1]) / (lon_feet * 2) + dX )
380                     lsegment.append( (id, lpoint) )
381                     id += 1
382                 if right:
383                     rpoint = (lastpoint[0] - Yp / lat_feet + dY, lastpoint[1] - Xp / lon_feet + dX)
384                     rsegment.append( (id, rpoint) )
385                     id += 1
386
387 #Generate the tags for ways and nodes
388                 zipr = ''
389                 zipl = ''
390                 name = ''
391                 county = ''
392                 if "tiger:zip_right" in waykey:
393                     zipr = waykey["tiger:zip_right"]
394                 if "tiger:zip_left" in waykey:
395                     zipl = waykey["tiger:zip_left"]
396                 if "name" in waykey:
397                     name = waykey["name"]
398                 if "tiger:county" in waykey:
399                     county = waykey["tiger:county"]
400                 if "tiger:separated" in waykey: # No longer set in Tiger-2017
401                     separated = waykey["tiger:separated"]
402                 else:
403                     separated = "N"
404
405 #Write the nodes of the offset ways
406                 if right:
407                     rlinestring = [];
408                     for i, point in rsegment:
409                         rlinestring.append( "%f %f" % (point[1], point[0]) )
410                 if left:
411                     llinestring = [];
412                     for i, point in lsegment:
413                         llinestring.append( "%f %f" % (point[1], point[0]) )
414                 if right:
415                     rsegments.append( rsegment )
416                 if left:
417                     lsegments.append( lsegment )
418                 rtofromint = right        #Do the addresses convert to integers?
419                 ltofromint = left        #Do the addresses convert to integers?
420                 if right:
421                     try: rfromint = int(rfromadd)
422                     except:
423                         print("Non integer address: %s" % rfromadd)
424                         rtofromint = False
425                     try: rtoint = int(rtoadd)
426                     except:
427                         print("Non integer address: %s" % rtoadd)
428                         rtofromint = False
429                 if left:
430                     try: lfromint = int(lfromadd)
431                     except:
432                         print("Non integer address: %s" % lfromadd)
433                         ltofromint = False
434                     try: ltoint = int(ltoadd)
435                     except:
436                         print("Non integer address: %s" % ltoadd)
437                         ltofromint = False
438                 if right:
439                     id += 1
440
441                     interpolationtype = "all";
442                     if rtofromint:
443                         if (rfromint % 2) == 0 and (rtoint % 2) == 0:
444                             if separated == "Y":        #Doesn't matter if there is another side
445                                 interpolationtype = "even";
446                             elif ltofromint and (lfromint % 2) == 1 and (ltoint % 2) == 1:
447                                 interpolationtype = "even";
448                         elif (rfromint % 2) == 1 and (rtoint % 2) == 1:
449                             if separated == "Y":        #Doesn't matter if there is another side
450                                 interpolationtype = "odd";
451                             elif ltofromint and (lfromint % 2) == 0 and (ltoint % 2) == 0:
452                                 interpolationtype = "odd";
453
454                     ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
455                                 ( ",".join(rlinestring), sql_quote(rfromadd), sql_quote(rtoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipr) ) )
456
457                 if left:
458                     id += 1
459
460                     interpolationtype = "all";
461                     if ltofromint:
462                         if (lfromint % 2) == 0 and (ltoint % 2) == 0:
463                             if separated == "Y":
464                                 interpolationtype = "even";
465                             elif rtofromint and (rfromint % 2) == 1 and (rtoint % 2) == 1:
466                                 interpolationtype = "even";
467                         elif (lfromint % 2) == 1 and (ltoint % 2) == 1:
468                             if separated == "Y":
469                                 interpolationtype = "odd";
470                             elif rtofromint and (rfromint %2 ) == 0 and (rtoint % 2) == 0:
471                                 interpolationtype = "odd";
472
473                     ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
474                                 ( ",".join(llinestring), sql_quote(lfromadd), sql_quote(ltoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipl) ) )
475
476     return ret
477
478 def sql_quote( string ):
479     return "'" + string.replace("'", "''") + "'"
480
481 def unproject( point ):
482     pt = tr.TransformPoint( point[0], point[1] )
483     return (pt[1], pt[0])
484
485 def round_point( point, accuracy=8 ):
486     return tuple( [ round(x,accuracy) for x in point ] )
487
488 def compile_nodelist( parsed_gisdata, first_id=1 ):
489     nodelist = {}
490     
491     i = first_id
492     for geom, tags in parsed_gisdata:
493         if len( geom )==0:
494             continue
495         
496         for point in geom:
497             r_point = round_point( point )
498             if r_point not in nodelist:
499                 nodelist[ r_point ] = (i, unproject( point ))
500                 i += 1
501             
502     return (i, nodelist)
503
504 def adjacent( left, right ):
505     left_left = round_point(left[0])
506     left_right = round_point(left[-1])
507     right_left = round_point(right[0])
508     right_right = round_point(right[-1])
509     
510     return ( left_left == right_left or
511              left_left == right_right or
512              left_right == right_left or
513              left_right == right_right )
514              
515 def glom( left, right ):
516     left = list( left )
517     right = list( right )
518     
519     left_left = round_point(left[0])
520     left_right = round_point(left[-1])
521     right_left = round_point(right[0])
522     right_right = round_point(right[-1])
523     
524     if left_left == right_left:
525         left.reverse()
526         return left[0:-1] + right
527         
528     if left_left == right_right:
529         return right[0:-1] + left
530         
531     if left_right == right_left:
532         return left[0:-1] + right
533         
534     if left_right == right_right:
535         right.reverse()
536         return left[0:-1] + right
537         
538     raise 'segments are not adjacent'
539
540 def glom_once( segments ):
541     if len(segments)==0:
542         return segments
543     
544     unsorted = list( segments )
545     x = unsorted.pop(0)
546     
547     while len( unsorted ) > 0:
548         n = len( unsorted )
549         
550         for i in range(0, n):
551             y = unsorted[i]
552             if adjacent( x, y ):
553                 y = unsorted.pop(i)
554                 x = glom( x, y )
555                 break
556                 
557         # Sorted and unsorted lists have no adjacent segments
558         if len( unsorted ) == n:
559             break
560             
561     return x, unsorted
562     
563 def glom_all( segments ):
564     unsorted = segments
565     chunks = []
566     
567     while unsorted != []:
568         chunk, unsorted = glom_once( unsorted )
569         chunks.append( chunk )
570         
571     return chunks
572         
573                 
574
575 def compile_waylist( parsed_gisdata ):
576     waylist = {}
577     
578     #Group by tiger:way_id
579     for geom, tags in parsed_gisdata:
580         way_key = tags.copy()
581         way_key = ( way_key['tiger:way_id'], tuple( [(k,v) for k,v in way_key.items()] ) )
582         
583         if way_key not in waylist:
584             waylist[way_key] = []
585             
586         waylist[way_key].append( geom )
587     
588     ret = {}
589     for (way_id, way_key), segments in waylist.items():
590         ret[way_key] = glom_all( segments )
591     return ret
592             
593
594 def shape_to_sql( shp_filename, sql_filename ):
595     
596     print("parsing shpfile %s" % shp_filename)
597     parsed_features = parse_shp_for_geom_and_tags( shp_filename )
598     
599     print("compiling nodelist")
600     i, nodelist = compile_nodelist( parsed_features )
601     
602     print("compiling waylist")
603     waylist = compile_waylist( parsed_features )
604
605     print("preparing address ways")
606     sql_lines = addressways(waylist, nodelist, i)
607
608     print("writing %s" % sql_filename)
609     fp = open( sql_filename, "w" )
610     fp.write( "\n".join( sql_lines ) )
611     fp.close()
612     
613 if __name__ == '__main__':
614     import sys, os.path
615     if len(sys.argv) < 3:
616         print("%s input.shp output.sql" % sys.argv[0])
617         sys.exit()
618     shp_filename = sys.argv[1]
619     sql_filename = sys.argv[2]
620     shape_to_sql(shp_filename, sql_filename)