]> git.openstreetmap.org Git - nominatim.git/commitdiff
initial flex import scripts
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 3 Nov 2022 16:15:01 +0000 (17:15 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 10 Nov 2022 08:37:38 +0000 (09:37 +0100)
Only implements the extratags style for the moment. Tests pass
for the same behaviour as the gazetteer output. Updates still need
to be done.

CMakeLists.txt
nominatim/clicmd/args.py
nominatim/tools/exec_utils.py
settings/flex-base.lua [new file with mode: 0644]
settings/import-extratags.lua [new file with mode: 0644]
test/bdd/osm2pgsql/import/tags.feature
test/bdd/steps/steps_osm_data.py

index 036dda31b03418d575a763ae88705646bfd9a240..f5f776a171a0ddf9d6d8d6fcd9ed99a16cee7da9 100644 (file)
@@ -63,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
     endif()
     set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
     set(BUILD_TESTS off)
     endif()
     set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
     set(BUILD_TESTS off)
-    set(WITH_LUA off CACHE BOOL "")
     add_subdirectory(osm2pgsql)
     set(BUILD_TESTS ${BUILD_TESTS_SAVED})
 endif()
     add_subdirectory(osm2pgsql)
     set(BUILD_TESTS ${BUILD_TESTS_SAVED})
 endif()
index 2f8273d602890be4971d59ac5495f7a54858cd48..b120ee7364f5a686d2e34563745274a4456ccd8e 100644 (file)
@@ -184,6 +184,7 @@ class NominatimArgs:
         return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
                     osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
                     osm2pgsql_style=self.config.get_import_style_file(),
         return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
                     osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
                     osm2pgsql_style=self.config.get_import_style_file(),
+                    osm2pgsql_style_path=self.config.config_dir,
                     threads=self.threads or default_threads,
                     dsn=self.config.get_libpq_dsn(),
                     flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
                     threads=self.threads or default_threads,
                     dsn=self.config.get_libpq_dsn(),
                     flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
index 610e2182d5436b8723b0bc3ebe2cb24fcd672f2c..675e070b3f30d00d8d9149c88454d2e61795279c 100644 (file)
@@ -10,6 +10,7 @@ Helper functions for executing external programs.
 from typing import Any, Union, Optional, Mapping, IO
 from pathlib import Path
 import logging
 from typing import Any, Union, Optional, Mapping, IO
 from pathlib import Path
 import logging
+import os
 import subprocess
 import urllib.request as urlrequest
 from urllib.parse import urlencode
 import subprocess
 import urllib.request as urlrequest
 from urllib.parse import urlencode
@@ -120,9 +121,16 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
            '--log-progress', 'true',
            '--number-processes', str(options['threads']),
            '--cache', str(options['osm2pgsql_cache']),
            '--log-progress', 'true',
            '--number-processes', str(options['threads']),
            '--cache', str(options['osm2pgsql_cache']),
-           '--output', 'gazetteer',
            '--style', str(options['osm2pgsql_style'])
           ]
            '--style', str(options['osm2pgsql_style'])
           ]
+
+    if str(options['osm2pgsql_style']).endswith('.lua'):
+        env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / 'flex-base.lua'),
+                                    os.environ.get('LUAPATH', ';')))
+        cmd.extend(('--output', 'flex'))
+    else:
+        cmd.extend(('--output', 'gazetteer'))
+
     if options['append']:
         cmd.append('--append')
     else:
     if options['append']:
         cmd.append('--append')
     else:
diff --git a/settings/flex-base.lua b/settings/flex-base.lua
new file mode 100644 (file)
index 0000000..d91299a
--- /dev/null
@@ -0,0 +1,382 @@
+-- Core functions for Nominatim import flex style.
+--
+
+
+-- The single place table.
+place_table = osm2pgsql.define_table{
+    name = "place",
+    ids = { type = 'any', id_column = 'osm_id', type_column = 'osm_type' },
+    columns = {
+        { column = 'class', type = 'text', not_null = true },
+        { column = 'type', type = 'text', not_null = true },
+        { column = 'admin_level', type = 'smallint' },
+        { column = 'name', type = 'hstore' },
+        { column = 'address', type = 'hstore' },
+        { column = 'extratags', type = 'hstore' },
+        { column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true },
+    }
+}
+
+------------- Place class ------------------------------------------
+
+local Place = {}
+Place.__index = Place
+
+function Place.new(object, geom_func)
+    local self = setmetatable({}, Place)
+    self.object = object
+    self.geom_func = geom_func
+
+    self.admin_level = tonumber(self.object:grab_tag('admin_level'))
+    if self.admin_level == nil
+       or self.admin_level <= 0 or self.admin_level > 15
+       or math.floor(self.admin_level) ~= self.admin_level then
+        self.admin_level = 15
+    end
+
+    self.num_entries = 0
+    self.has_name = false
+    self.names = {}
+    self.address = {}
+    self.extratags = {}
+
+    return self
+end
+
+function Place:delete(data)
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+            end
+        end
+    end
+end
+
+function Place:grab_extratags(data)
+    local count = 0
+
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+                self.extratags[k] = v
+                count = count + 1
+            end
+        end
+    end
+
+    return count
+end
+
+function Place:grab_address(data)
+    local count = 0
+
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+
+                if data.include_on_name == true then
+                    self.has_name = true
+                end
+
+                if data.out_key ~= nil then
+                    self.address[data.out_key] = v
+                    return 1
+                end
+
+                if k:sub(1, 5) == 'addr:' then
+                    self.address[k:sub(6)] = v
+                elseif k:sub(1, 6) == 'is_in:' then
+                    self.address[k:sub(7)] = v
+                else
+                    self.address[k] = v
+                end
+                count = count + 1
+            end
+        end
+    end
+
+    return count
+end
+
+function Place:set_address(key, value)
+    self.address[key] = value
+end
+
+function Place:grab_name(data)
+    local count = 0
+
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+                self.names[k] = v
+                if data.include_on_name ~= false then
+                    self.has_name = true
+                end
+                count = count + 1
+            end
+        end
+    end
+
+    return count
+end
+
+function Place:grab_tag(key)
+    return self.object:grab_tag(key)
+end
+
+function Place:tags()
+    return self.object.tags
+end
+
+function Place:write_place(k, v, mtype, save_extra_mains)
+    if mtype == nil then
+        return 0
+    end
+
+    v = v or self.object.tags[k]
+    if v == nil then
+        return 0
+    end
+
+    if type(mtype) == 'table' then
+        mtype = mtype[v] or mtype[1]
+    end
+
+    if mtype == 'always' or (self.has_name and mtype == 'named') then
+        return self:write_row(k, v, save_extra_mains)
+    end
+
+    if mtype == 'named_with_key' then
+        local names = {}
+        local prefix = k .. ':name'
+        for namek, namev in pairs(self.object.tags) do
+            if namek:sub(1, #prefix) == prefix
+               and (#namek == #prefix
+                    or namek:sub(#prefix + 1, #prefix + 1) == ':') then
+                names[namek:sub(#k + 2)] = namev
+            end
+        end
+
+        if next(names) ~= nil then
+            local saved_names = self.names
+            self.names = names
+
+            local results = self:write_row(k, v, save_extra_mains)
+
+            self.names = saved_names
+
+            return results
+        end
+    end
+
+    return 0
+end
+
+function Place:write_row(k, v, save_extra_mains)
+    if self.geometry == nil then
+        self.geometry = self.geom_func(self.object)
+    end
+    if self.geometry:is_null() then
+        return 0
+    end
+
+    if save_extra_mains then
+        for extra_k, extra_v in pairs(self.object.tags) do
+            if extra_k ~= k then
+                self.extratags[extra_k] = extra_v
+            end
+        end
+    end
+
+    place_table:insert{
+        class = k,
+        type = v,
+        admin_level = self.admin_level,
+        name = next(self.names) and self.names,
+        address = next(self.address) and self.address,
+        extratags = next(self.extratags) and self.extratags,
+        geometry = self.geometry
+    }
+
+    if save_extra_mains then
+        for k, v in pairs(self.object.tags) do
+            self.extratags[k] = nil
+        end
+    end
+
+    self.num_entries = self.num_entries + 1
+
+    return 1
+end
+
+
+function tag_match(data)
+    if data == nil or next(data) == nil then
+        return nil
+    end
+
+    local tests = {}
+
+    if data.keys ~= nil then
+        for _, key in pairs(data.keys) do
+            if key:sub(1, 1) == '*' then
+                if #key > 1 then
+                    local suffix = key:sub(2)
+                    tests[#tests + 1] = function (k, v)
+                        return k:sub(-#suffix) == suffix
+                    end
+                end
+            elseif key:sub(#key, #key) == '*' then
+                local prefix = key:sub(1, #key - 1)
+                tests[#tests + 1] = function (k, v)
+                    return k:sub(1, #prefix) == prefix
+                end
+            else
+                tests[#tests + 1] = function (k, v)
+                    return k == key
+                end
+            end
+        end
+    end
+
+    if data.tags ~= nil then
+        local tags = {}
+        for k, vlist in pairs(data.tags) do
+            tags[k] = {}
+            for _, v in pairs(vlist) do
+                tags[k][v] = true
+            end
+        end
+        tests[#tests + 1] = function (k, v)
+            return tags[k] ~= nil and tags[k][v] ~= nil
+        end
+    end
+
+    return function (k, v)
+        for _, func in pairs(tests) do
+            if func(k, v) then
+                return true
+            end
+        end
+        return false
+    end
+end
+
+
+-- Process functions for all data types
+function osm2pgsql.process_node(object)
+
+    local function geom_func(o)
+        return o:as_point()
+    end
+
+    process_tags(Place.new(object, geom_func))
+end
+
+function osm2pgsql.process_way(object)
+
+    local function geom_func(o)
+        local geom = o:as_polygon()
+
+        if geom:is_null() then
+            geom = o:as_linestring()
+        end
+
+        return geom
+    end
+
+    process_tags(Place.new(object, geom_func))
+end
+
+function relation_as_multipolygon(o)
+    return o:as_multipolygon()
+end
+
+function relation_as_multiline(o)
+    return o:as_multilinestring():line_merge()
+end
+
+function osm2pgsql.process_relation(object)
+    local geom_func = RELATION_TYPES[object.tags.type]
+
+    if geom_func ~= nil then
+        process_tags(Place.new(object, geom_func))
+    end
+end
+
+function process_tags(o)
+    local fallback
+
+    o:delete{match = PRE_DELETE}
+    o:grab_extratags{match = PRE_EXTRAS}
+
+    -- Exception for boundary/place double tagging
+    if o.object.tags.boundary == 'administrative' then
+        o:grab_extratags{match = function (k, v)
+            return k == 'place' and v:sub(1,3) ~= 'isl'
+        end}
+    end
+
+    -- address keys
+    o:grab_address{match=function (k, v) return COUNTRY_TAGS(k, v) and #v == 2 end,
+                   out_key='country'}
+    if o:grab_name{match=HOUSENAME_TAGS} > 0 then
+        fallback = {'place', 'house'}
+    end
+    if o:grab_address{match=HOUSENUMBER_TAGS, include_on_name = true} > 0 and fallback == nil then
+        fallback = {'place', 'house'}
+    end
+    if o:grab_address{match=POSTCODES, out_key='postcode'} > 0 and fallback == nil then
+        fallback = {'place', 'postcode'}
+    end
+
+    local is_interpolation = o:grab_address{match=INTERPOLATION_TAGS} > 0
+
+    if ADD_TIGER_COUNTY then
+        local v = o:grab_tag('tiger:county')
+        if v ~= nil then
+            v, num = v:gsub(',.*', ' county')
+            if num == 0 then
+                v = v .. ' county'
+            end
+            o:set_address('tiger:county', v)
+        end
+    end
+    o:grab_address{match=ADDRESS_TAGS}
+
+    if is_interpolation then
+        o:write_place('place', 'houses', 'always', SAVE_EXTRA_MAINS)
+        return
+    end
+
+    -- name keys
+    o:grab_name{match = NAMES}
+    o:grab_name{match = REFS, include_on_name = false}
+
+    o:delete{match = POST_DELETE}
+    o:grab_extratags{match = POST_EXTRAS}
+
+    -- collect main keys
+    local num_mains = 0
+    for k, v in pairs(o:tags()) do
+        num_mains = num_mains + o:write_place(k, v, MAIN_KEYS[k], SAVE_EXTRA_MAINS)
+    end
+
+    if num_mains == 0 then
+        for tag, mtype in pairs(MAIN_FALLBACK_KEYS) do
+            if o:write_place(tag, nil, mtype, SAVE_EXTRA_MAINS) > 0 then
+                return
+            end
+        end
+
+        if fallback ~= nil then
+            o:write_place(fallback[1], fallback[2], 'always', SAVE_EXTRA_MAINS)
+        end
+    end
+end
+
+
diff --git a/settings/import-extratags.lua b/settings/import-extratags.lua
new file mode 100644 (file)
index 0000000..535af3c
--- /dev/null
@@ -0,0 +1,130 @@
+require('flex-base')
+
+RELATION_TYPES = {
+    multipolygon = relation_as_multipolygon,
+    boundary = relation_as_multipolygon,
+    waterway = relation_as_multiline
+}
+
+MAIN_KEYS = {
+    emergency = 'always',
+    historic = 'always',
+    military = 'always',
+    natural = 'named',
+    landuse = 'named',
+    highway = {'always',
+               street_lamp = 'named',
+               traffic_signals = 'named',
+               service = 'named',
+               cycleway = 'named',
+               path = 'named',
+               footway = 'named',
+               steps = 'named',
+               bridleway = 'named',
+               track = 'named',
+               motorway_link = 'named',
+               trunk_link = 'named',
+               primary_link = 'named',
+               secondary_link = 'named',
+               tertiary_link = 'named'},
+    railway = 'named',
+    man_made = 'always',
+    aerialway = 'always',
+    boundary = {'named',
+                postal_code = 'named'},
+    aeroway = 'always',
+    amenity = 'always',
+    club = 'always',
+    craft = 'always',
+    leisure = 'always',
+    office = 'always',
+    mountain_pass = 'always',
+    shop = 'always',
+    tourism = 'always',
+    bridge = 'named_with_key',
+    tunnel = 'named_with_key',
+    waterway = 'named',
+    place = 'always'
+}
+
+MAIN_FALLBACK_KEYS = {
+    building = 'named',
+    landuse = 'named',
+    junction = 'named',
+    healthcare = 'named'
+}
+
+
+PRE_DELETE = tag_match{keys = {'note', 'note:*', 'source', 'source*', 'attribution',
+                               'comment', 'fixme', 'FIXME', 'created_by', 'NHD:*',
+                               'nhd:*', 'gnis:*', 'geobase:*', 'KSJ2:*', 'yh:*',
+                               'osak:*', 'naptan:*', 'CLC:*', 'import', 'it:fvg:*',
+                               'type', 'lacounty:*', 'ref:ruian:*', 'building:ruian:type',
+                               'ref:linz:*', 'is_in:postcode'},
+                       tags = {emergency = {'yes', 'no', 'fire_hydrant'},
+                               historic = {'yes', 'no'},
+                               military = {'yes', 'no'},
+                               natural = {'yes', 'no', 'coastline'},
+                               highway = {'no', 'turning_circle', 'mini_roundabout',
+                                          'noexit', 'crossing', 'give_way', 'stop'},
+                               railway = {'level_crossing', 'no', 'rail'},
+                               man_made = {'survey_point', 'cutline'},
+                               aerialway = {'pylon', 'no'},
+                               aeroway = {'no'},
+                               amenity = {'no'},
+                               club = {'no'},
+                               craft = {'no'},
+                               leisure = {'no'},
+                               office = {'no'},
+                               mountain_pass = {'no'},
+                               shop = {'no'},
+                               tourism = {'yes', 'no'},
+                               bridge = {'no'},
+                               tunnel = {'no'},
+                               waterway = {'riverbank'},
+                               building = {'no'},
+                               boundary = {'place'}}
+                      }
+
+POST_DELETE = tag_match{keys = {'tiger:*'}}
+
+PRE_EXTRAS = tag_match{keys = {'*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*',
+                               'name:etymology', 'name:signed', 'name:botanical',
+                               'wikidata', '*:wikidata',
+                               'addr:street:name', 'addr:street:type'}
+                      }
+
+
+NAMES = tag_match{keys = {'name', 'name:*',
+                          'int_name', 'int_name:*',
+                          'nat_name', 'nat_name:*',
+                          'reg_name', 'reg_name:*',
+                          'loc_name', 'loc_name:*',
+                          'old_name', 'old_name:*',
+                          'alt_name', 'alt_name:*', 'alt_name_*',
+                          'official_name', 'official_name:*',
+                          'place_name', 'place_name:*',
+                          'short_name', 'short_name:*', 'brand'}}
+
+REFS = tag_match{keys = {'ref', 'int_ref', 'nat_ref', 'reg_ref', 'loc_ref', 'old_ref',
+                         'iata', 'icao', 'pcode', 'pcode:*', 'ISO3166-2'}}
+
+POSTCODES = tag_match{keys = {'postal_code', 'postcode', 'addr:postcode',
+                              'tiger:zip_left', 'tiger:zip_right'}}
+
+COUNTRY_TAGS = tag_match{keys = {'country_code', 'ISO3166-1',
+                                 'addr:country_code', 'is_in:country_code',
+                                 'addr:country', 'is_in:country'}}
+
+HOUSENAME_TAGS = tag_match{keys = {'addr:housename'}}
+
+HOUSENUMBER_TAGS = tag_match{keys = {'addr:housenumber', 'addr:conscriptionnumber',
+                                     'addr:streetnumber'}}
+
+INTERPOLATION_TAGS = tag_match{keys = {'addr:interpolation'}}
+
+ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*'}}
+ADD_TIGER_COUNTY = true
+
+SAVE_EXTRA_MAINS = true
+
index 8c3d5c139a1ac90d60ff26bd4b9d83fc4a2e4965..83d7fe52e7d48309e825ff36e991c253e236fb33 100644 (file)
@@ -106,10 +106,10 @@ Feature: Tag evaluation
             n7002 Thighway=primary,bridge=yes,bridge:name=1
             """
         Then place contains exactly
             n7002 Thighway=primary,bridge=yes,bridge:name=1
             """
         Then place contains exactly
-            | object        | class   | type    | name        | extratags         |
-            | N7001         | highway | primary | 'name': '1' | -                 |
-            | N7002:highway | highway | primary | -           | 'bridge:name': '1'|
-            | N7002:bridge  | bridge  | yes     | 'name': '1' | 'bridge:name': '1'|
+            | object        | class   | type    | name        | extratags+bridge:name |
+            | N7001         | highway | primary | 'name': '1' | -                     |
+            | N7002:highway | highway | primary | -           | 1                     |
+            | N7002:bridge  | bridge  | yes     | 'name': '1' | 1                     |
 
 
     Scenario: Global fallback and skipping
 
 
     Scenario: Global fallback and skipping
@@ -153,13 +153,15 @@ Feature: Tag evaluation
             n10002 Tboundary=natural,place=city,name=B
             n10003 Tboundary=administrative,place=island,name=C
             """
             n10002 Tboundary=natural,place=city,name=B
             n10003 Tboundary=administrative,place=island,name=C
             """
-        Then place contains exactly
+        Then place contains
             | object          | class    | type           | extratags       |
             | N10001          | boundary | administrative | 'place': 'city' |
             | object          | class    | type           | extratags       |
             | N10001          | boundary | administrative | 'place': 'city' |
-            | N10002:boundary | boundary | natural        | - |
-            | N10002:place    | place    | city           | - |
-            | N10003:boundary | boundary | administrative | - |
-            | N10003:place    | place    | island         | - |
+        And place contains
+            | object          | class    | type           |
+            | N10002:boundary | boundary | natural        |
+            | N10002:place    | place    | city           |
+            | N10003:boundary | boundary | administrative |
+            | N10003:place    | place    | island         |
 
 
     Scenario: Shorten tiger:county tags
 
 
     Scenario: Shorten tiger:county tags
@@ -200,6 +202,6 @@ Feature: Tag evaluation
             n13002 Taddr:interpolation=even,place=city
             """
         Then place contains exactly
             n13002 Taddr:interpolation=even,place=city
             """
         Then place contains exactly
-            | object | class | type   | extratags       | address                 |
-            | N13001 | place | houses | -               | 'interpolation': 'odd'  |
-            | N13002 | place | houses | 'place': 'city' | 'interpolation': 'even' |
+            | object | class | type   | address                 |
+            | N13001 | place | houses | 'interpolation': 'odd'  |
+            | N13002 | place | houses | 'interpolation': 'even' |
index 6271f6b827dae37c9fb8423a0af016af7d19288e..0082bd081e3056445ea3dd6383fb3db4cea9611c 100644 (file)
@@ -18,6 +18,7 @@ def get_osm2pgsql_options(nominatim_env, fname, append):
                 osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'),
                 osm2pgsql_cache=50,
                 osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()),
                 osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'),
                 osm2pgsql_cache=50,
                 osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()),
+                osm2pgsql_style_path=nominatim_env.get_test_config().config_dir,
                 threads=1,
                 dsn=nominatim_env.get_libpq_dsn(),
                 flatnode_file='',
                 threads=1,
                 dsn=nominatim_env.get_libpq_dsn(),
                 flatnode_file='',