]> git.openstreetmap.org Git - nominatim.git/blob - utils/tigerAddressImport.py
ignore linked places for address details
[nominatim.git] / utils / tigerAddressImport.py
1 #!/usr/bin/python
2 # Tiger road data to OSM conversion script
3 # Creates Karlsruhe-style address ways beside the main way
4 # based on the Massachusetts GIS script by christopher schmidt
5
6 #BUGS:
7 # On very tight curves, a loop may be generated in the address way.
8 # It would be nice if the ends of the address ways were not pulled back from dead ends
9
10
11 # Ways that include these mtfccs should not be uploaded
12 # H1100 Connector
13 # H3010 Stream/River
14 # H3013 Braided Stream
15 # H3020 Canal, Ditch or Aqueduct
16 # L4130 Point-to-Point Line
17 # L4140 Property/Parcel Line (Including PLSS)
18 # P0001 Nonvisible Linear Legal/Statistical Boundary
19 # P0002 Perennial Shoreline
20 # P0003 Intermittent Shoreline
21 # P0004 Other non-visible bounding Edge (e.g., Census water boundary, boundary of an areal feature)
22 ignoremtfcc = [ "H1100", "H3010", "H3013", "H3020", "L4130", "L4140", "P0001", "P0002", "P0003", "P0004" ]
23
24 # Sets the distance that the address ways should be from the main way, in feet.
25 address_distance = 30
26
27 # Sets the distance that the ends of the address ways should be pulled back from the ends of the main way, in feet
28 address_pullback = 45
29
30 import sys, os.path, json
31 try:
32     from osgeo import ogr
33     from osgeo import osr
34 except:
35     import ogr
36     import osr
37
38 # tiger_county_fips was generated from the following:
39 # wget http://www.census.gov/datamap/fipslist/AllSt.txt
40 # cat AllSt.txt  | grep '^                [0-9]' | awk "{printf \"'%s' : '%s' ,\\n\", \$1, substr(\$0, 31)}" | > countyfips.py
41 json_fh = open(os.path.dirname(sys.argv[0]) + "/tiger_county_fips.json")
42 county_fips_data = json.load(json_fh)
43
44 def parse_shp_for_geom_and_tags( filename ):
45     #ogr.RegisterAll()
46
47     dr = ogr.GetDriverByName("ESRI Shapefile")
48     poDS = dr.Open( filename )
49
50     if poDS == None:
51         raise "Open failed."
52
53     poLayer = poDS.GetLayer( 0 )
54
55     fieldNameList = []
56     layerDefinition = poLayer.GetLayerDefn()
57     for i in range(layerDefinition.GetFieldCount()):
58         fieldNameList.append(layerDefinition.GetFieldDefn(i).GetName())
59     # sys.stderr.write(",".join(fieldNameList))
60
61     poLayer.ResetReading()
62
63     ret = []
64
65     poFeature = poLayer.GetNextFeature()
66     while poFeature:
67         tags = {}
68         
69         # WAY ID
70         tags["tiger:way_id"] = int( poFeature.GetField("TLID") )
71         
72         # FEATURE IDENTIFICATION
73         mtfcc = poFeature.GetField("MTFCC");
74         if mtfcc != None:
75
76             if mtfcc == "L4010":        #Pipeline
77                 tags["man_made"] = "pipeline"
78             if mtfcc == "L4020":        #Powerline
79                 tags["power"] = "line"
80             if mtfcc == "L4031":        #Aerial Tramway/Ski Lift
81                 tags["aerialway"] = "cable_car"
82             if mtfcc == "L4110":        #Fence Line
83                 tags["barrier"] = "fence"
84             if mtfcc == "L4125":        #Cliff/Escarpment
85                 tags["natural"] = "cliff"
86             if mtfcc == "L4165":        #Ferry Crossing
87                 tags["route"] = "ferry"
88             if mtfcc == "R1011":        #Railroad Feature (Main, Spur, or Yard)
89                 tags["railway"] = "rail"
90                 ttyp = poFeature.GetField("TTYP")
91                 if ttyp != None:
92                     if ttyp == "S":
93                         tags["service"] = "spur"
94                     if ttyp == "Y":
95                         tags["service"] = "yard"
96                     tags["tiger:ttyp"] = ttyp
97             if mtfcc == "R1051":        #Carline, Streetcar Track, Monorail, Other Mass Transit Rail)
98                 tags["railway"] = "light_rail"
99             if mtfcc == "R1052":        #Cog Rail Line, Incline Rail Line, Tram
100                 tags["railway"] = "incline"
101             if mtfcc == "S1100":
102                 tags["highway"] = "primary"
103             if mtfcc == "S1200":
104                 tags["highway"] = "secondary"
105             if mtfcc == "S1400":
106                 tags["highway"] = "residential"
107             if mtfcc == "S1500":
108                 tags["highway"] = "track"
109             if mtfcc == "S1630":        #Ramp
110                 tags["highway"] = "motorway_link"
111             if mtfcc == "S1640":        #Service Drive usually along a limited access highway
112                 tags["highway"] = "service"
113             if mtfcc == "S1710":        #Walkway/Pedestrian Trail
114                 tags["highway"] = "path"
115             if mtfcc == "S1720":
116                 tags["highway"] = "steps"
117             if mtfcc == "S1730":        #Alley
118                 tags["highway"] = "service"
119                 tags["service"] = "alley"
120             if mtfcc == "S1740":        #Private Road for service vehicles (logging, oil, fields, ranches, etc.)
121                 tags["highway"] = "service"
122                 tags["access"] = "private"
123             if mtfcc == "S1750":        #Private Driveway
124                 tags["highway"] = "service"
125                 tags["access"] = "private"
126                 tags["service"] = "driveway"
127             if mtfcc == "S1780":        #Parking Lot Road
128                 tags["highway"] = "service"
129                 tags["service"] = "parking_aisle"
130             if mtfcc == "S1820":        #Bike Path or Trail
131                 tags["highway"] = "cycleway"
132             if mtfcc == "S1830":        #Bridle Path
133                 tags["highway"] = "bridleway"
134             tags["tiger:mtfcc"] = mtfcc
135
136         # FEATURE NAME
137         if poFeature.GetField("FULLNAME"):
138             #capitalizes the first letter of each word
139             name = poFeature.GetField( "FULLNAME" )
140             tags["name"] = name
141
142             #Attempt to guess highway grade
143             if name[0:2] == "I-":
144                 tags["highway"] = "motorway"
145             if name[0:3] == "US ":
146                 tags["highway"] = "primary"
147             if name[0:3] == "US-":
148                 tags["highway"] = "primary"
149             if name[0:3] == "Hwy":
150                 if tags["highway"] != "primary":
151                     tags["highway"] = "secondary"
152
153         # TIGER 2017 no longer contains this field
154         if 'DIVROAD' in fieldNameList:
155             divroad = poFeature.GetField("DIVROAD")
156             if divroad != None:
157                 if divroad == "Y" and "highway" in tags and tags["highway"] == "residential":
158                     tags["highway"] = "tertiary"
159                 tags["tiger:separated"] = divroad
160
161         statefp = poFeature.GetField("STATEFP")
162         countyfp = poFeature.GetField("COUNTYFP")
163         if (statefp != None) and (countyfp != None):
164             county_name = county_fips_data.get(statefp + '' + countyfp)
165             if county_name:
166                 tags["tiger:county"] = county_name.encode("utf-8")
167
168         # tlid = poFeature.GetField("TLID")
169         # if tlid != None:
170         #     tags["tiger:tlid"] = tlid
171
172         lfromadd = poFeature.GetField("LFROMADD")
173         if lfromadd != None:
174             tags["tiger:lfromadd"] = lfromadd
175
176         rfromadd = poFeature.GetField("RFROMADD")
177         if rfromadd != None:
178             tags["tiger:rfromadd"] = rfromadd
179
180         ltoadd = poFeature.GetField("LTOADD")
181         if ltoadd != None:
182             tags["tiger:ltoadd"] = ltoadd
183
184         rtoadd = poFeature.GetField("RTOADD")
185         if rtoadd != None:
186             tags["tiger:rtoadd"] = rtoadd
187
188         zipl = poFeature.GetField("ZIPL")
189         if zipl != None:
190             tags["tiger:zip_left"] = zipl
191
192         zipr = poFeature.GetField("ZIPR")
193         if zipr != None:
194             tags["tiger:zip_right"] = zipr
195
196         if mtfcc not in ignoremtfcc:
197             # COPY DOWN THE GEOMETRY
198             geom = []
199             
200             rawgeom = poFeature.GetGeometryRef()
201             for i in range( rawgeom.GetPointCount() ):
202                 geom.append( (rawgeom.GetX(i), rawgeom.GetY(i)) )
203     
204             ret.append( (geom, tags) )
205         poFeature = poLayer.GetNextFeature()
206         
207     return ret
208
209
210 # ====================================
211 # to do read .prj file for this data
212 # Change the Projcs_wkt to match your datas prj file.
213 # ====================================
214 projcs_wkt = \
215 """GEOGCS["GCS_North_American_1983",
216         DATUM["D_North_American_1983",
217         SPHEROID["GRS_1980",6378137,298.257222101]],
218         PRIMEM["Greenwich",0],
219         UNIT["Degree",0.017453292519943295]]"""
220
221 from_proj = osr.SpatialReference()
222 from_proj.ImportFromWkt( projcs_wkt )
223
224 # output to WGS84
225 to_proj = osr.SpatialReference()
226 to_proj.SetWellKnownGeogCS( "EPSG:4326" )
227
228 tr = osr.CoordinateTransformation( from_proj, to_proj )
229
230 import math
231 def length(segment, nodelist):
232     '''Returns the length (in feet) of a segment'''
233     first = True
234     distance = 0
235     lat_feet = 364613  #The approximate number of feet in one degree of latitude
236     for point in segment:
237         pointid, (lat, lon) = nodelist[ round_point( point ) ]
238         if first:
239             first = False
240         else:
241             #The approximate number of feet in one degree of longitute
242             lrad = math.radians(lat)
243             lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
244             distance += math.sqrt(((lat - previous[0])*lat_feet)**2 + ((lon - previous[1])*lon_feet)**2)
245         previous = (lat, lon)
246     return distance
247
248 def addressways(waylist, nodelist, first_id):
249     id = first_id
250     lat_feet = 364613  #The approximate number of feet in one degree of latitude
251     distance = float(address_distance)
252     ret = []
253
254     for waykey, segments in waylist.items():
255         waykey = dict(waykey)
256         rsegments = []
257         lsegments = []
258         for segment in segments:
259             lsegment = []
260             rsegment = []
261             lastpoint = None
262
263             # Don't pull back the ends of very short ways too much
264             seglength = length(segment, nodelist)
265             if seglength < float(address_pullback) * 3.0:
266                 pullback = seglength / 3.0
267             else:
268                 pullback = float(address_pullback)
269             if "tiger:lfromadd" in waykey:
270                 lfromadd = waykey["tiger:lfromadd"]
271             else:
272                 lfromadd = None
273             if "tiger:ltoadd" in waykey:
274                 ltoadd = waykey["tiger:ltoadd"]
275             else:
276                 ltoadd = None
277             if "tiger:rfromadd" in waykey:
278                 rfromadd = waykey["tiger:rfromadd"]
279             else: 
280                 rfromadd = None
281             if "tiger:rtoadd" in waykey:
282                 rtoadd = waykey["tiger:rtoadd"]
283             else:
284                 rtoadd = None
285             if rfromadd != None and rtoadd != None:
286                 right = True
287             else:
288                 right = False
289             if lfromadd != None and ltoadd != None:
290                 left = True
291             else:
292                 left = False
293             if left or right:
294                 first = True
295                 firstpointid, firstpoint = nodelist[ round_point( segment[0] ) ]
296
297                 finalpointid, finalpoint = nodelist[ round_point( segment[len(segment) - 1] ) ]
298                 for point in segment:
299                     pointid, (lat, lon) = nodelist[ round_point( point ) ]
300
301                     #The approximate number of feet in one degree of longitute
302                     lrad = math.radians(lat)
303                     lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
304
305 #Calculate the points of the offset ways
306                     if lastpoint != None:
307                         #Skip points too close to start
308                         if math.sqrt((lat * lat_feet - firstpoint[0] * lat_feet)**2 + (lon * lon_feet - firstpoint[1] * lon_feet)**2) < pullback:
309                             #Preserve very short ways (but will be rendered backwards)
310                             if pointid != finalpointid:
311                                 continue
312                         #Skip points too close to end
313                         if math.sqrt((lat * lat_feet - finalpoint[0] * lat_feet)**2 + (lon * lon_feet - finalpoint[1] * lon_feet)**2) < pullback:
314                             #Preserve very short ways (but will be rendered backwards)
315                             if (pointid != firstpointid) and (pointid != finalpointid):
316                                 continue
317
318                         X = (lon - lastpoint[1]) * lon_feet
319                         Y = (lat - lastpoint[0]) * lat_feet
320                         if Y != 0:
321                             theta = math.pi/2 - math.atan( X / Y)
322                             Xp = math.sin(theta) * distance
323                             Yp = math.cos(theta) * distance
324                         else:
325                             Xp = 0
326                             if X > 0:
327                                 Yp = -distance
328                             else:
329                                 Yp = distance
330
331                         if Y > 0:
332                             Xp = -Xp
333                         else:
334                             Yp = -Yp
335                                 
336                         if first:
337                             first = False
338                             dX =  - (Yp * (pullback / distance)) / lon_feet #Pull back the first point
339                             dY = (Xp * (pullback / distance)) / lat_feet
340                             if left:
341                                 lpoint = (lastpoint[0] + (Yp / lat_feet) - dY, lastpoint[1] + (Xp / lon_feet) - dX)
342                                 lsegment.append( (id, lpoint) )
343                                 id += 1
344                             if right:
345                                 rpoint = (lastpoint[0] - (Yp / lat_feet) - dY, lastpoint[1] - (Xp / lon_feet) - dX)
346                                 rsegment.append( (id, rpoint) )
347                                 id += 1
348
349                         else:
350                             #round the curves
351                             if delta[1] != 0:
352                                 theta = abs(math.atan(delta[0] / delta[1]))
353                             else:
354                                 theta = math.pi / 2
355                             if Xp != 0:
356                                 theta = theta - abs(math.atan(Yp / Xp))
357                             else: theta = theta - math.pi / 2
358                             r = 1 + abs(math.tan(theta/2))
359                             if left:
360                                 lpoint = (lastpoint[0] + (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] + (Xp + delta[1]) * r / (lon_feet * 2))
361                                 lsegment.append( (id, lpoint) )
362                                 id += 1
363                             if right:
364                                 rpoint = (lastpoint[0] - (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] - (Xp + delta[1]) * r / (lon_feet * 2))
365                                 
366                                 rsegment.append( (id, rpoint) )
367                                 id += 1
368
369                         delta = (Yp, Xp)
370
371                     lastpoint = (lat, lon)
372
373
374 #Add in the last node
375                 dX =  - (Yp * (pullback / distance)) / lon_feet
376                 dY = (Xp * (pullback / distance)) / lat_feet
377                 if left:
378                     lpoint = (lastpoint[0] + (Yp + delta[0]) / (lat_feet * 2) + dY, lastpoint[1] + (Xp + delta[1]) / (lon_feet * 2) + dX )
379                     lsegment.append( (id, lpoint) )
380                     id += 1
381                 if right:
382                     rpoint = (lastpoint[0] - Yp / lat_feet + dY, lastpoint[1] - Xp / lon_feet + dX)
383                     rsegment.append( (id, rpoint) )
384                     id += 1
385
386 #Generate the tags for ways and nodes
387                 zipr = ''
388                 zipl = ''
389                 name = ''
390                 county = ''
391                 if "tiger:zip_right" in waykey:
392                     zipr = waykey["tiger:zip_right"]
393                 if "tiger:zip_left" in waykey:
394                     zipl = waykey["tiger:zip_left"]
395                 if "name" in waykey:
396                     name = waykey["name"]
397                 if "tiger:county" in waykey:
398                     county = waykey["tiger:county"]
399                 if "tiger:separated" in waykey: # No longer set in Tiger-2017
400                     separated = waykey["tiger:separated"]
401                 else:
402                     separated = "N"
403
404 #Write the nodes of the offset ways
405                 if right:
406                     rlinestring = [];
407                     for i, point in rsegment:
408                         rlinestring.append( "%f %f" % (point[1], point[0]) )
409                 if left:
410                     llinestring = [];
411                     for i, point in lsegment:
412                         llinestring.append( "%f %f" % (point[1], point[0]) )
413                 if right:
414                     rsegments.append( rsegment )
415                 if left:
416                     lsegments.append( lsegment )
417                 rtofromint = right        #Do the addresses convert to integers?
418                 ltofromint = left        #Do the addresses convert to integers?
419                 if right:
420                     try: rfromint = int(rfromadd)
421                     except:
422                         print("Non integer address: %s" % rfromadd)
423                         rtofromint = False
424                     try: rtoint = int(rtoadd)
425                     except:
426                         print("Non integer address: %s" % rtoadd)
427                         rtofromint = False
428                 if left:
429                     try: lfromint = int(lfromadd)
430                     except:
431                         print("Non integer address: %s" % lfromadd)
432                         ltofromint = False
433                     try: ltoint = int(ltoadd)
434                     except:
435                         print("Non integer address: %s" % ltoadd)
436                         ltofromint = False
437                 if right:
438                     id += 1
439
440                     interpolationtype = "all";
441                     if rtofromint:
442                         if (rfromint % 2) == 0 and (rtoint % 2) == 0:
443                             if separated == "Y":        #Doesn't matter if there is another side
444                                 interpolationtype = "even";
445                             elif ltofromint and (lfromint % 2) == 1 and (ltoint % 2) == 1:
446                                 interpolationtype = "even";
447                         elif (rfromint % 2) == 1 and (rtoint % 2) == 1:
448                             if separated == "Y":        #Doesn't matter if there is another side
449                                 interpolationtype = "odd";
450                             elif ltofromint and (lfromint % 2) == 0 and (ltoint % 2) == 0:
451                                 interpolationtype = "odd";
452
453                     ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
454                                 ( ",".join(rlinestring), sql_quote(rfromadd), sql_quote(rtoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipr) ) )
455
456                 if left:
457                     id += 1
458
459                     interpolationtype = "all";
460                     if ltofromint:
461                         if (lfromint % 2) == 0 and (ltoint % 2) == 0:
462                             if separated == "Y":
463                                 interpolationtype = "even";
464                             elif rtofromint and (rfromint % 2) == 1 and (rtoint % 2) == 1:
465                                 interpolationtype = "even";
466                         elif (lfromint % 2) == 1 and (ltoint % 2) == 1:
467                             if separated == "Y":
468                                 interpolationtype = "odd";
469                             elif rtofromint and (rfromint %2 ) == 0 and (rtoint % 2) == 0:
470                                 interpolationtype = "odd";
471
472                     ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
473                                 ( ",".join(llinestring), sql_quote(lfromadd), sql_quote(ltoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipl) ) )
474
475     return ret
476
477 def sql_quote( string ):
478     return "'" + string.replace("'", "''") + "'"
479
480 def unproject( point ):
481     pt = tr.TransformPoint( point[0], point[1] )
482     return (pt[1], pt[0])
483
484 def round_point( point, accuracy=8 ):
485     return tuple( [ round(x,accuracy) for x in point ] )
486
487 def compile_nodelist( parsed_gisdata, first_id=1 ):
488     nodelist = {}
489     
490     i = first_id
491     for geom, tags in parsed_gisdata:
492         if len( geom )==0:
493             continue
494         
495         for point in geom:
496             r_point = round_point( point )
497             if r_point not in nodelist:
498                 nodelist[ r_point ] = (i, unproject( point ))
499                 i += 1
500             
501     return (i, nodelist)
502
503 def adjacent( left, right ):
504     left_left = round_point(left[0])
505     left_right = round_point(left[-1])
506     right_left = round_point(right[0])
507     right_right = round_point(right[-1])
508     
509     return ( left_left == right_left or
510              left_left == right_right or
511              left_right == right_left or
512              left_right == right_right )
513              
514 def glom( left, right ):
515     left = list( left )
516     right = list( right )
517     
518     left_left = round_point(left[0])
519     left_right = round_point(left[-1])
520     right_left = round_point(right[0])
521     right_right = round_point(right[-1])
522     
523     if left_left == right_left:
524         left.reverse()
525         return left[0:-1] + right
526         
527     if left_left == right_right:
528         return right[0:-1] + left
529         
530     if left_right == right_left:
531         return left[0:-1] + right
532         
533     if left_right == right_right:
534         right.reverse()
535         return left[0:-1] + right
536         
537     raise 'segments are not adjacent'
538
539 def glom_once( segments ):
540     if len(segments)==0:
541         return segments
542     
543     unsorted = list( segments )
544     x = unsorted.pop(0)
545     
546     while len( unsorted ) > 0:
547         n = len( unsorted )
548         
549         for i in range(0, n):
550             y = unsorted[i]
551             if adjacent( x, y ):
552                 y = unsorted.pop(i)
553                 x = glom( x, y )
554                 break
555                 
556         # Sorted and unsorted lists have no adjacent segments
557         if len( unsorted ) == n:
558             break
559             
560     return x, unsorted
561     
562 def glom_all( segments ):
563     unsorted = segments
564     chunks = []
565     
566     while unsorted != []:
567         chunk, unsorted = glom_once( unsorted )
568         chunks.append( chunk )
569         
570     return chunks
571         
572                 
573
574 def compile_waylist( parsed_gisdata ):
575     waylist = {}
576     
577     #Group by tiger:way_id
578     for geom, tags in parsed_gisdata:
579         way_key = tags.copy()
580         way_key = ( way_key['tiger:way_id'], tuple( [(k,v) for k,v in way_key.items()] ) )
581         
582         if way_key not in waylist:
583             waylist[way_key] = []
584             
585         waylist[way_key].append( geom )
586     
587     ret = {}
588     for (way_id, way_key), segments in waylist.items():
589         ret[way_key] = glom_all( segments )
590     return ret
591             
592
593 def shape_to_sql( shp_filename, sql_filename ):
594     
595     print("parsing shpfile %s" % shp_filename)
596     parsed_features = parse_shp_for_geom_and_tags( shp_filename )
597     
598     print("compiling nodelist")
599     i, nodelist = compile_nodelist( parsed_features )
600     
601     print("compiling waylist")
602     waylist = compile_waylist( parsed_features )
603
604     print("preparing address ways")
605     sql_lines = addressways(waylist, nodelist, i)
606
607     print("writing %s" % sql_filename)
608     fp = open( sql_filename, "w" )
609     fp.write( "\n".join( sql_lines ) )
610     fp.close()
611     
612 if __name__ == '__main__':
613     import sys, os.path
614     if len(sys.argv) < 3:
615         print("%s input.shp output.sql" % sys.argv[0])
616         sys.exit()
617     shp_filename = sys.argv[1]
618     sql_filename = sys.argv[2]
619     shape_to_sql(shp_filename, sql_filename)