From 28ee3d0949a749fab1a08000b899e232c6a8c29e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 20 Aug 2021 21:53:13 +0200 Subject: [PATCH] move linking of places to the preparation stage Linked places may bring in extra names. These names need to be processed by the tokenizer. That means that the linking needs to be done before the data is handed to the tokenizer. Move finding the linked place into the preparation stage and update the name fields. Everything else is still done in the indexing stage. --- lib-sql/functions/placex_triggers.sql | 36 ++++++++++++++++++++------- nominatim/indexer/runners.py | 11 +++++--- test/bdd/db/query/linking.feature | 22 ++++++++++++++++ test/python/test_indexing.py | 7 ++++-- 4 files changed, 61 insertions(+), 15 deletions(-) create mode 100644 test/bdd/db/query/linking.feature diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index e3be9503..fa7156ec 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -11,8 +11,11 @@ CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, OUT name HSTORE, OUT address HSTORE, - OUT country_feature VARCHAR) + OUT country_feature VARCHAR, + OUT linked_place_id BIGINT) AS $$ +DECLARE + location RECORD; BEGIN -- For POI nodes, check if the address should be derived from a surrounding -- building. @@ -34,6 +37,18 @@ BEGIN address := address - '_unlisted_place'::TEXT; name := p.name; + -- Names of linked places need to be merged in, so search for a linkable + -- place already here. + SELECT * INTO location FROM find_linked_place(p); + + IF location.place_id is not NULL THEN + linked_place_id := location.place_id; + + IF NOT location.name IS NULL THEN + name := location.name || name; + END IF; + END IF; + country_feature := CASE WHEN p.admin_level = 2 and p.class = 'boundary' and p.type = 'administrative' and p.osm_type = 'R' @@ -683,6 +698,8 @@ DECLARE nameaddress_vector INTEGER[]; addr_nameaddress_vector INTEGER[]; + linked_place BIGINT; + linked_node_id BIGINT; linked_importance FLOAT; linked_wikipedia TEXT; @@ -718,9 +735,14 @@ BEGIN NEW.extratags := NEW.extratags - 'linked_place'::TEXT; + -- NEW.linked_place_id contains the precomputed linkee. Save this and restore + -- the previous link status. + linked_place := NEW.linked_place_id; + NEW.linked_place_id := OLD.linked_place_id; + IF NEW.linked_place_id is not null THEN NEW.token_info := null; - {% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %} + {% if debug %}RAISE WARNING 'place already linked to %', OLD.linked_place_id;{% endif %} RETURN NEW; END IF; @@ -956,8 +978,9 @@ BEGIN -- --------------------------------------------------------------------------- -- Full indexing {% if debug %}RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id;{% endif %} - SELECT * INTO location FROM find_linked_place(NEW); - IF location.place_id is not null THEN + IF linked_place is not null THEN + SELECT * INTO location FROM placex WHERE place_id = linked_place; + {% if debug %}RAISE WARNING 'Linked %', location;{% endif %} -- Use the linked point as the centre point of the geometry, @@ -974,11 +997,6 @@ BEGIN NEW.rank_address := location.rank_address; END IF; - -- merge in the label name - IF NOT location.name IS NULL THEN - NEW.name := location.name || NEW.name; - END IF; - -- merge in extra tags NEW.extratags := hstore('linked_' || location.class, location.type) || coalesce(location.extratags, ''::hstore) diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py index 068d7d0f..29261ee5 100644 --- a/nominatim/indexer/runners.py +++ b/nominatim/indexer/runners.py @@ -16,6 +16,7 @@ class AbstractPlacexRunner: """ Returns SQL commands for indexing of the placex table. """ SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ') + UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)" def __init__(self, rank, analyzer): self.rank = rank @@ -27,10 +28,11 @@ class AbstractPlacexRunner: def _index_sql(num_places): return pysql.SQL( """ UPDATE placex - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) + SET indexed_status = 0, address = v.addr, token_info = v.ti, + name = v.name, linked_place_id = v.linked_place_id + FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti) WHERE place_id = v.id - """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) + """).format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places)) @staticmethod @@ -43,7 +45,8 @@ class AbstractPlacexRunner: def index_places(self, worker, places): values = [] for place in places: - values.extend((place[x] for x in ('place_id', 'address'))) + for field in ('place_id', 'name', 'address', 'linked_place_id'): + values.append(place[field]) values.append(psycopg2.extras.Json(self.analyzer.process_place(place))) worker.perform(self._index_sql(len(places)), values) diff --git a/test/bdd/db/query/linking.feature b/test/bdd/db/query/linking.feature new file mode 100644 index 00000000..4e6c47d8 --- /dev/null +++ b/test/bdd/db/query/linking.feature @@ -0,0 +1,22 @@ +@DB +Feature: Searching linked places + Tests that information from linked places can be searched correctly + + Scenario: Additional names from linked places are searchable + Given the places + | osm | class | type | admin | name | geometry | + | R13 | boundary | administrative | 6 | Garbo | poly-area:0.1 | + Given the places + | osm | class | type | admin | name+name:it | geometry | + | N2 | place | hamlet | 15 | Vario | 0.006 0.00001 | + And the relations + | id | members | tags+type | + | 13 | N2:label | boundary | + When importing + Then placex contains + | object | linked_place_id | + | N2 | R13 | + When sending search query "Vario" + Then results contain + | osm | + | R13 | diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py index 9873e7d7..60ad0bc4 100644 --- a/test/python/test_indexing.py +++ b/test/python/test_indexing.py @@ -19,8 +19,10 @@ class IndexerTestDB: with self.conn.cursor() as cur: cur.execute('CREATE EXTENSION hstore') cur.execute("""CREATE TABLE placex (place_id BIGINT, + name HSTORE, class TEXT, type TEXT, + linked_place_id BIGINT, rank_address SMALLINT, rank_search SMALLINT, indexed_status SMALLINT, @@ -55,11 +57,12 @@ class IndexerTestDB: cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, OUT name HSTORE, OUT address HSTORE, - OUT country_feature VARCHAR) + OUT country_feature VARCHAR, + OUT linked_place_id BIGINT) AS $$ BEGIN address := p.address; - name := p.address; + name := p.name; END; $$ LANGUAGE plpgsql STABLE; """) -- 2.39.5