From 51ed55cc32580644544b8e38c570bbfdaf09b5a2 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 3 Nov 2022 17:15:01 +0100 Subject: [PATCH] initial flex import scripts Only implements the extratags style for the moment. Tests pass for the same behaviour as the gazetteer output. Updates still need to be done. --- CMakeLists.txt | 1 - nominatim/clicmd/args.py | 1 + nominatim/tools/exec_utils.py | 10 +- settings/flex-base.lua | 382 +++++++++++++++++++++++++ settings/import-extratags.lua | 130 +++++++++ test/bdd/osm2pgsql/import/tags.feature | 26 +- test/bdd/steps/steps_osm_data.py | 1 + 7 files changed, 537 insertions(+), 14 deletions(-) create mode 100644 settings/flex-base.lua create mode 100644 settings/import-extratags.lua diff --git a/CMakeLists.txt b/CMakeLists.txt index 036dda31..f5f776a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL) endif() set(BUILD_TESTS_SAVED "${BUILD_TESTS}") set(BUILD_TESTS off) - set(WITH_LUA off CACHE BOOL "") add_subdirectory(osm2pgsql) set(BUILD_TESTS ${BUILD_TESTS_SAVED}) endif() diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index 2f8273d6..b120ee73 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -184,6 +184,7 @@ class NominatimArgs: return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path, osm2pgsql_cache=self.osm2pgsql_cache or default_cache, osm2pgsql_style=self.config.get_import_style_file(), + osm2pgsql_style_path=self.config.config_dir, threads=self.threads or default_threads, dsn=self.config.get_libpq_dsn(), flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''), diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 610e2182..675e070b 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -10,6 +10,7 @@ Helper functions for executing external programs. from typing import Any, Union, Optional, Mapping, IO from pathlib import Path import logging +import os import subprocess import urllib.request as urlrequest from urllib.parse import urlencode @@ -120,9 +121,16 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None: '--log-progress', 'true', '--number-processes', str(options['threads']), '--cache', str(options['osm2pgsql_cache']), - '--output', 'gazetteer', '--style', str(options['osm2pgsql_style']) ] + + if str(options['osm2pgsql_style']).endswith('.lua'): + env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / 'flex-base.lua'), + os.environ.get('LUAPATH', ';'))) + cmd.extend(('--output', 'flex')) + else: + cmd.extend(('--output', 'gazetteer')) + if options['append']: cmd.append('--append') else: diff --git a/settings/flex-base.lua b/settings/flex-base.lua new file mode 100644 index 00000000..d91299ad --- /dev/null +++ b/settings/flex-base.lua @@ -0,0 +1,382 @@ +-- Core functions for Nominatim import flex style. +-- + + +-- The single place table. +place_table = osm2pgsql.define_table{ + name = "place", + ids = { type = 'any', id_column = 'osm_id', type_column = 'osm_type' }, + columns = { + { column = 'class', type = 'text', not_null = true }, + { column = 'type', type = 'text', not_null = true }, + { column = 'admin_level', type = 'smallint' }, + { column = 'name', type = 'hstore' }, + { column = 'address', type = 'hstore' }, + { column = 'extratags', type = 'hstore' }, + { column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true }, + } +} + +------------- Place class ------------------------------------------ + +local Place = {} +Place.__index = Place + +function Place.new(object, geom_func) + local self = setmetatable({}, Place) + self.object = object + self.geom_func = geom_func + + self.admin_level = tonumber(self.object:grab_tag('admin_level')) + if self.admin_level == nil + or self.admin_level <= 0 or self.admin_level > 15 + or math.floor(self.admin_level) ~= self.admin_level then + self.admin_level = 15 + end + + self.num_entries = 0 + self.has_name = false + self.names = {} + self.address = {} + self.extratags = {} + + return self +end + +function Place:delete(data) + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + end + end + end +end + +function Place:grab_extratags(data) + local count = 0 + + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + self.extratags[k] = v + count = count + 1 + end + end + end + + return count +end + +function Place:grab_address(data) + local count = 0 + + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + + if data.include_on_name == true then + self.has_name = true + end + + if data.out_key ~= nil then + self.address[data.out_key] = v + return 1 + end + + if k:sub(1, 5) == 'addr:' then + self.address[k:sub(6)] = v + elseif k:sub(1, 6) == 'is_in:' then + self.address[k:sub(7)] = v + else + self.address[k] = v + end + count = count + 1 + end + end + end + + return count +end + +function Place:set_address(key, value) + self.address[key] = value +end + +function Place:grab_name(data) + local count = 0 + + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + self.names[k] = v + if data.include_on_name ~= false then + self.has_name = true + end + count = count + 1 + end + end + end + + return count +end + +function Place:grab_tag(key) + return self.object:grab_tag(key) +end + +function Place:tags() + return self.object.tags +end + +function Place:write_place(k, v, mtype, save_extra_mains) + if mtype == nil then + return 0 + end + + v = v or self.object.tags[k] + if v == nil then + return 0 + end + + if type(mtype) == 'table' then + mtype = mtype[v] or mtype[1] + end + + if mtype == 'always' or (self.has_name and mtype == 'named') then + return self:write_row(k, v, save_extra_mains) + end + + if mtype == 'named_with_key' then + local names = {} + local prefix = k .. ':name' + for namek, namev in pairs(self.object.tags) do + if namek:sub(1, #prefix) == prefix + and (#namek == #prefix + or namek:sub(#prefix + 1, #prefix + 1) == ':') then + names[namek:sub(#k + 2)] = namev + end + end + + if next(names) ~= nil then + local saved_names = self.names + self.names = names + + local results = self:write_row(k, v, save_extra_mains) + + self.names = saved_names + + return results + end + end + + return 0 +end + +function Place:write_row(k, v, save_extra_mains) + if self.geometry == nil then + self.geometry = self.geom_func(self.object) + end + if self.geometry:is_null() then + return 0 + end + + if save_extra_mains then + for extra_k, extra_v in pairs(self.object.tags) do + if extra_k ~= k then + self.extratags[extra_k] = extra_v + end + end + end + + place_table:insert{ + class = k, + type = v, + admin_level = self.admin_level, + name = next(self.names) and self.names, + address = next(self.address) and self.address, + extratags = next(self.extratags) and self.extratags, + geometry = self.geometry + } + + if save_extra_mains then + for k, v in pairs(self.object.tags) do + self.extratags[k] = nil + end + end + + self.num_entries = self.num_entries + 1 + + return 1 +end + + +function tag_match(data) + if data == nil or next(data) == nil then + return nil + end + + local tests = {} + + if data.keys ~= nil then + for _, key in pairs(data.keys) do + if key:sub(1, 1) == '*' then + if #key > 1 then + local suffix = key:sub(2) + tests[#tests + 1] = function (k, v) + return k:sub(-#suffix) == suffix + end + end + elseif key:sub(#key, #key) == '*' then + local prefix = key:sub(1, #key - 1) + tests[#tests + 1] = function (k, v) + return k:sub(1, #prefix) == prefix + end + else + tests[#tests + 1] = function (k, v) + return k == key + end + end + end + end + + if data.tags ~= nil then + local tags = {} + for k, vlist in pairs(data.tags) do + tags[k] = {} + for _, v in pairs(vlist) do + tags[k][v] = true + end + end + tests[#tests + 1] = function (k, v) + return tags[k] ~= nil and tags[k][v] ~= nil + end + end + + return function (k, v) + for _, func in pairs(tests) do + if func(k, v) then + return true + end + end + return false + end +end + + +-- Process functions for all data types +function osm2pgsql.process_node(object) + + local function geom_func(o) + return o:as_point() + end + + process_tags(Place.new(object, geom_func)) +end + +function osm2pgsql.process_way(object) + + local function geom_func(o) + local geom = o:as_polygon() + + if geom:is_null() then + geom = o:as_linestring() + end + + return geom + end + + process_tags(Place.new(object, geom_func)) +end + +function relation_as_multipolygon(o) + return o:as_multipolygon() +end + +function relation_as_multiline(o) + return o:as_multilinestring():line_merge() +end + +function osm2pgsql.process_relation(object) + local geom_func = RELATION_TYPES[object.tags.type] + + if geom_func ~= nil then + process_tags(Place.new(object, geom_func)) + end +end + +function process_tags(o) + local fallback + + o:delete{match = PRE_DELETE} + o:grab_extratags{match = PRE_EXTRAS} + + -- Exception for boundary/place double tagging + if o.object.tags.boundary == 'administrative' then + o:grab_extratags{match = function (k, v) + return k == 'place' and v:sub(1,3) ~= 'isl' + end} + end + + -- address keys + o:grab_address{match=function (k, v) return COUNTRY_TAGS(k, v) and #v == 2 end, + out_key='country'} + if o:grab_name{match=HOUSENAME_TAGS} > 0 then + fallback = {'place', 'house'} + end + if o:grab_address{match=HOUSENUMBER_TAGS, include_on_name = true} > 0 and fallback == nil then + fallback = {'place', 'house'} + end + if o:grab_address{match=POSTCODES, out_key='postcode'} > 0 and fallback == nil then + fallback = {'place', 'postcode'} + end + + local is_interpolation = o:grab_address{match=INTERPOLATION_TAGS} > 0 + + if ADD_TIGER_COUNTY then + local v = o:grab_tag('tiger:county') + if v ~= nil then + v, num = v:gsub(',.*', ' county') + if num == 0 then + v = v .. ' county' + end + o:set_address('tiger:county', v) + end + end + o:grab_address{match=ADDRESS_TAGS} + + if is_interpolation then + o:write_place('place', 'houses', 'always', SAVE_EXTRA_MAINS) + return + end + + -- name keys + o:grab_name{match = NAMES} + o:grab_name{match = REFS, include_on_name = false} + + o:delete{match = POST_DELETE} + o:grab_extratags{match = POST_EXTRAS} + + -- collect main keys + local num_mains = 0 + for k, v in pairs(o:tags()) do + num_mains = num_mains + o:write_place(k, v, MAIN_KEYS[k], SAVE_EXTRA_MAINS) + end + + if num_mains == 0 then + for tag, mtype in pairs(MAIN_FALLBACK_KEYS) do + if o:write_place(tag, nil, mtype, SAVE_EXTRA_MAINS) > 0 then + return + end + end + + if fallback ~= nil then + o:write_place(fallback[1], fallback[2], 'always', SAVE_EXTRA_MAINS) + end + end +end + + diff --git a/settings/import-extratags.lua b/settings/import-extratags.lua new file mode 100644 index 00000000..535af3c8 --- /dev/null +++ b/settings/import-extratags.lua @@ -0,0 +1,130 @@ +require('flex-base') + +RELATION_TYPES = { + multipolygon = relation_as_multipolygon, + boundary = relation_as_multipolygon, + waterway = relation_as_multiline +} + +MAIN_KEYS = { + emergency = 'always', + historic = 'always', + military = 'always', + natural = 'named', + landuse = 'named', + highway = {'always', + street_lamp = 'named', + traffic_signals = 'named', + service = 'named', + cycleway = 'named', + path = 'named', + footway = 'named', + steps = 'named', + bridleway = 'named', + track = 'named', + motorway_link = 'named', + trunk_link = 'named', + primary_link = 'named', + secondary_link = 'named', + tertiary_link = 'named'}, + railway = 'named', + man_made = 'always', + aerialway = 'always', + boundary = {'named', + postal_code = 'named'}, + aeroway = 'always', + amenity = 'always', + club = 'always', + craft = 'always', + leisure = 'always', + office = 'always', + mountain_pass = 'always', + shop = 'always', + tourism = 'always', + bridge = 'named_with_key', + tunnel = 'named_with_key', + waterway = 'named', + place = 'always' +} + +MAIN_FALLBACK_KEYS = { + building = 'named', + landuse = 'named', + junction = 'named', + healthcare = 'named' +} + + +PRE_DELETE = tag_match{keys = {'note', 'note:*', 'source', 'source*', 'attribution', + 'comment', 'fixme', 'FIXME', 'created_by', 'NHD:*', + 'nhd:*', 'gnis:*', 'geobase:*', 'KSJ2:*', 'yh:*', + 'osak:*', 'naptan:*', 'CLC:*', 'import', 'it:fvg:*', + 'type', 'lacounty:*', 'ref:ruian:*', 'building:ruian:type', + 'ref:linz:*', 'is_in:postcode'}, + tags = {emergency = {'yes', 'no', 'fire_hydrant'}, + historic = {'yes', 'no'}, + military = {'yes', 'no'}, + natural = {'yes', 'no', 'coastline'}, + highway = {'no', 'turning_circle', 'mini_roundabout', + 'noexit', 'crossing', 'give_way', 'stop'}, + railway = {'level_crossing', 'no', 'rail'}, + man_made = {'survey_point', 'cutline'}, + aerialway = {'pylon', 'no'}, + aeroway = {'no'}, + amenity = {'no'}, + club = {'no'}, + craft = {'no'}, + leisure = {'no'}, + office = {'no'}, + mountain_pass = {'no'}, + shop = {'no'}, + tourism = {'yes', 'no'}, + bridge = {'no'}, + tunnel = {'no'}, + waterway = {'riverbank'}, + building = {'no'}, + boundary = {'place'}} + } + +POST_DELETE = tag_match{keys = {'tiger:*'}} + +PRE_EXTRAS = tag_match{keys = {'*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*', + 'name:etymology', 'name:signed', 'name:botanical', + 'wikidata', '*:wikidata', + 'addr:street:name', 'addr:street:type'} + } + + +NAMES = tag_match{keys = {'name', 'name:*', + 'int_name', 'int_name:*', + 'nat_name', 'nat_name:*', + 'reg_name', 'reg_name:*', + 'loc_name', 'loc_name:*', + 'old_name', 'old_name:*', + 'alt_name', 'alt_name:*', 'alt_name_*', + 'official_name', 'official_name:*', + 'place_name', 'place_name:*', + 'short_name', 'short_name:*', 'brand'}} + +REFS = tag_match{keys = {'ref', 'int_ref', 'nat_ref', 'reg_ref', 'loc_ref', 'old_ref', + 'iata', 'icao', 'pcode', 'pcode:*', 'ISO3166-2'}} + +POSTCODES = tag_match{keys = {'postal_code', 'postcode', 'addr:postcode', + 'tiger:zip_left', 'tiger:zip_right'}} + +COUNTRY_TAGS = tag_match{keys = {'country_code', 'ISO3166-1', + 'addr:country_code', 'is_in:country_code', + 'addr:country', 'is_in:country'}} + +HOUSENAME_TAGS = tag_match{keys = {'addr:housename'}} + +HOUSENUMBER_TAGS = tag_match{keys = {'addr:housenumber', 'addr:conscriptionnumber', + 'addr:streetnumber'}} + +INTERPOLATION_TAGS = tag_match{keys = {'addr:interpolation'}} + +ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*'}} +ADD_TIGER_COUNTY = true + +SAVE_EXTRA_MAINS = true + diff --git a/test/bdd/osm2pgsql/import/tags.feature b/test/bdd/osm2pgsql/import/tags.feature index 8c3d5c13..83d7fe52 100644 --- a/test/bdd/osm2pgsql/import/tags.feature +++ b/test/bdd/osm2pgsql/import/tags.feature @@ -106,10 +106,10 @@ Feature: Tag evaluation n7002 Thighway=primary,bridge=yes,bridge:name=1 """ Then place contains exactly - | object | class | type | name | extratags | - | N7001 | highway | primary | 'name': '1' | - | - | N7002:highway | highway | primary | - | 'bridge:name': '1'| - | N7002:bridge | bridge | yes | 'name': '1' | 'bridge:name': '1'| + | object | class | type | name | extratags+bridge:name | + | N7001 | highway | primary | 'name': '1' | - | + | N7002:highway | highway | primary | - | 1 | + | N7002:bridge | bridge | yes | 'name': '1' | 1 | Scenario: Global fallback and skipping @@ -153,13 +153,15 @@ Feature: Tag evaluation n10002 Tboundary=natural,place=city,name=B n10003 Tboundary=administrative,place=island,name=C """ - Then place contains exactly + Then place contains | object | class | type | extratags | | N10001 | boundary | administrative | 'place': 'city' | - | N10002:boundary | boundary | natural | - | - | N10002:place | place | city | - | - | N10003:boundary | boundary | administrative | - | - | N10003:place | place | island | - | + And place contains + | object | class | type | + | N10002:boundary | boundary | natural | + | N10002:place | place | city | + | N10003:boundary | boundary | administrative | + | N10003:place | place | island | Scenario: Shorten tiger:county tags @@ -200,6 +202,6 @@ Feature: Tag evaluation n13002 Taddr:interpolation=even,place=city """ Then place contains exactly - | object | class | type | extratags | address | - | N13001 | place | houses | - | 'interpolation': 'odd' | - | N13002 | place | houses | 'place': 'city' | 'interpolation': 'even' | + | object | class | type | address | + | N13001 | place | houses | 'interpolation': 'odd' | + | N13002 | place | houses | 'interpolation': 'even' | diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 6271f6b8..0082bd08 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -18,6 +18,7 @@ def get_osm2pgsql_options(nominatim_env, fname, append): osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'), osm2pgsql_cache=50, osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()), + osm2pgsql_style_path=nominatim_env.get_test_config().config_dir, threads=1, dsn=nominatim_env.get_libpq_dsn(), flatnode_file='', -- 2.39.5