]> git.openstreetmap.org Git - nominatim.git/commitdiff
move postcode normalization into tokenizer
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 25 Apr 2021 16:26:36 +0000 (18:26 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Fri, 30 Apr 2021 09:30:51 +0000 (11:30 +0200)
12 files changed:
lib-sql/functions/interpolation.sql
lib-sql/functions/placex_triggers.sql
lib-sql/tokenizer/legacy_tokenizer.sql
nominatim/clicmd/setup.py
nominatim/indexer/indexer.py
nominatim/indexer/runners.py
nominatim/tokenizer/legacy_tokenizer.py
nominatim/tools/postcodes.py
test/python/conftest.py
test/python/dummy_tokenizer.py
test/python/test_indexing.py
test/python/test_tools_postcodes.py

index a797cad3ac1de74b3500eb53d90bbd2111b826f1..8bfc307b7d9f1dd2147f190cb3a168695118fecd 100644 (file)
@@ -12,39 +12,47 @@ $$
 LANGUAGE plpgsql IMMUTABLE;
 
 
 LANGUAGE plpgsql IMMUTABLE;
 
 
+CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+RETURNS HSTORE
+  AS $$
+DECLARE
+  location RECORD;
+  waynodes BIGINT[];
+BEGIN
+  IF akeys(in_address) != ARRAY['interpolation'] THEN
+    RETURN in_address;
+  END IF;
+
+  SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
+  FOR location IN
+    SELECT placex.address, placex.osm_id FROM placex
+     WHERE osm_type = 'N' and osm_id = ANY(waynodes)
+           and placex.address is not null
+           and (placex.address ? 'street' or placex.address ? 'place')
+           and indexed_status < 100
+  LOOP
+    -- mark it as a derived address
+    RETURN location.address || in_address || hstore('_inherited', '');
+  END LOOP;
+
+  RETURN in_address;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+
 -- find the parent road of the cut road parts
 -- find the parent road of the cut road parts
-CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
+CREATE OR REPLACE FUNCTION get_interpolation_parent(street TEXT,
                                                     place TEXT, partition SMALLINT,
                                                     centroid GEOMETRY, geom GEOMETRY)
   RETURNS BIGINT
   AS $$
 DECLARE
                                                     place TEXT, partition SMALLINT,
                                                     centroid GEOMETRY, geom GEOMETRY)
   RETURNS BIGINT
   AS $$
 DECLARE
-  addr_street TEXT;
-  addr_place TEXT;
   parent_place_id BIGINT;
   parent_place_id BIGINT;
-
-  waynodes BIGINT[];
-
   location RECORD;
 BEGIN
   location RECORD;
 BEGIN
-  addr_street = street;
-  addr_place = place;
-
-  IF addr_street is null and addr_place is null THEN
-    select nodes from planet_osm_ways where id = wayid INTO waynodes;
-    FOR location IN SELECT placex.address from placex
-                    where osm_type = 'N' and osm_id = ANY(waynodes)
-                          and placex.address is not null
-                          and (placex.address ? 'street' or placex.address ? 'place')
-                          and indexed_status < 100
-                    limit 1 LOOP
-      addr_street = location.address->'street';
-      addr_place = location.address->'place';
-    END LOOP;
-  END IF;
-
-  parent_place_id := find_parent_for_address(addr_street, addr_place,
-                                             partition, centroid);
+  parent_place_id := find_parent_for_address(street, place, partition, centroid);
 
   IF parent_place_id is null THEN
     FOR location IN SELECT place_id FROM placex
 
   IF parent_place_id is null THEN
     FOR location IN SELECT place_id FROM placex
@@ -147,17 +155,20 @@ BEGIN
   NEW.interpolationtype = NEW.address->'interpolation';
 
   place_centroid := ST_PointOnSurface(NEW.linegeo);
   NEW.interpolationtype = NEW.address->'interpolation';
 
   place_centroid := ST_PointOnSurface(NEW.linegeo);
-  NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
+  NEW.parent_place_id = get_interpolation_parent(NEW.address->'street',
                                                  NEW.address->'place',
                                                  NEW.partition, place_centroid, NEW.linegeo);
 
   IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
     interpol_postcode := NEW.address->'postcode';
                                                  NEW.address->'place',
                                                  NEW.partition, place_centroid, NEW.linegeo);
 
   IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
     interpol_postcode := NEW.address->'postcode';
-    housenum := getorcreate_postcode_id(NEW.address->'postcode');
   ELSE
     interpol_postcode := NULL;
   END IF;
 
   ELSE
     interpol_postcode := NULL;
   END IF;
 
+  IF NEW.address ? '_inherited' THEN
+    NEW.address := hstore('interpolation', NEW.interpolationtype);
+  END IF;
+
   -- if the line was newly inserted, split the line as necessary
   IF OLD.indexed_status = 1 THEN
       select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes;
   -- if the line was newly inserted, split the line as necessary
   IF OLD.indexed_status = 1 THEN
       select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes;
index ca9ab5cc550f2889cc3b1653ed75a8aa2067e6ab..922d79bd51b3b0da6d9de7d7e7ee6eb147edcd3b 100644 (file)
@@ -817,10 +817,6 @@ BEGIN
   IF NEW.address is not NULL THEN
       addr_street := NEW.address->'street';
       addr_place := NEW.address->'place';
   IF NEW.address is not NULL THEN
       addr_street := NEW.address->'street';
       addr_place := NEW.address->'place';
-
-      IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
-        i := getorcreate_postcode_id(NEW.address->'postcode');
-      END IF;
   END IF;
 
   NEW.postcode := null;
   END IF;
 
   NEW.postcode := null;
index 916ba9aedc1e215906037be2bcf01ec7af583c28..4c22424ec74afa2504f51bbebdf9498fba2e740a 100644 (file)
@@ -34,6 +34,13 @@ AS $$
 $$ LANGUAGE SQL IMMUTABLE STRICT;
 
 
 $$ LANGUAGE SQL IMMUTABLE STRICT;
 
 
+CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+  RETURNS TEXT
+AS $$
+  SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
+$$ LANGUAGE SQL IMMUTABLE STRICT;
+
+
 -- Return token info that should be saved permanently in the database.
 CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
   RETURNS JSONB
 -- Return token info that should be saved permanently in the database.
 CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
   RETURNS JSONB
@@ -133,26 +140,26 @@ $$
 LANGUAGE plpgsql;
 
 
 LANGUAGE plpgsql;
 
 
-CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-  RETURNS INTEGER
+CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
+  RETURNS BOOLEAN
   AS $$
 DECLARE
   AS $$
 DECLARE
+  r RECORD;
   lookup_token TEXT;
   lookup_token TEXT;
-  lookup_word TEXT;
   return_word_id INTEGER;
 BEGIN
   return_word_id INTEGER;
 BEGIN
-  lookup_word := upper(trim(postcode));
-  lookup_token := ' ' || make_standard_name(lookup_word);
-  SELECT min(word_id) FROM word
-    WHERE word_token = lookup_token and word = lookup_word
+  lookup_token := ' ' || make_standard_name(postcode);
+  FOR r IN
+    SELECT word_id FROM word
+    WHERE word_token = lookup_token and word = postcode
           and class='place' and type='postcode'
           and class='place' and type='postcode'
-    INTO return_word_id;
-  IF return_word_id IS NULL THEN
-    return_word_id := nextval('seq_word');
-    INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
-                             'place', 'postcode', null, 0);
-  END IF;
-  RETURN return_word_id;
+  LOOP
+    RETURN false;
+  END LOOP;
+
+  INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
+                           'place', 'postcode', null, 0);
+  RETURN true;
 END;
 $$
 LANGUAGE plpgsql;
 END;
 $$
 LANGUAGE plpgsql;
index 499eff7673dc8f1d393eeb889c731eb980dc216f..1ce9cf3e0039642641c0c8ead82f52ca9b2b7d2c 100644 (file)
@@ -116,7 +116,8 @@ class SetupAll:
 
         if args.continue_at is None or args.continue_at == 'load-data':
             LOG.warning('Calculate postcodes')
 
         if args.continue_at is None or args.continue_at == 'load-data':
             LOG.warning('Calculate postcodes')
-            postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
+            postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
+                                       tokenizer)
 
         if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
             if args.continue_at is not None and args.continue_at != 'load-data':
 
         if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
             if args.continue_at is not None and args.continue_at != 'load-data':
index ea7b0e5986e22c0fad8c9b826bb03b2af3d1a29c..d0c6ea0ca66c8c0927cbfb718e6cd7518bbe8238 100644 (file)
@@ -147,7 +147,7 @@ class Indexer:
 
             if maxrank == 30:
                 self._index(runners.RankRunner(0, analyzer))
 
             if maxrank == 30:
                 self._index(runners.RankRunner(0, analyzer))
-                self._index(runners.InterpolationRunner(), 20)
+                self._index(runners.InterpolationRunner(analyzer), 20)
                 self._index(runners.RankRunner(30, analyzer), 20)
             else:
                 self._index(runners.RankRunner(maxrank, analyzer))
                 self._index(runners.RankRunner(30, analyzer), 20)
             else:
                 self._index(runners.RankRunner(maxrank, analyzer))
index 2bf9e51632a3ba84940755d005481aab9a9eed0b..75429fe427428a9bc79b7c9ec9a4f491a873028c 100644 (file)
@@ -25,7 +25,7 @@ class AbstractPlacexRunner:
                    SET indexed_status = 0, address = v.addr, token_info = v.ti
                    FROM (VALUES {}) as v(id, addr, ti)
                    WHERE place_id = v.id
                    SET indexed_status = 0, address = v.addr, token_info = v.ti
                    FROM (VALUES {}) as v(id, addr, ti)
                    WHERE place_id = v.id
-               """.format(','.join(["(%s, %s::hstore, %s::json)"]  * num_places))
+               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
 
 
     def index_places(self, worker, places):
 
 
     def index_places(self, worker, places):
@@ -82,6 +82,10 @@ class InterpolationRunner:
         location_property_osmline.
     """
 
         location_property_osmline.
     """
 
+    def __init__(self, analyzer):
+        self.analyzer = analyzer
+
+
     @staticmethod
     def name():
         return "interpolation lines (location_property_osmline)"
     @staticmethod
     def name():
         return "interpolation lines (location_property_osmline)"
@@ -93,15 +97,30 @@ class InterpolationRunner:
 
     @staticmethod
     def sql_get_objects():
 
     @staticmethod
     def sql_get_objects():
-        return """SELECT place_id FROM location_property_osmline
+        return """SELECT place_id, get_interpolation_address(address, osm_id) as address
+                  FROM location_property_osmline
                   WHERE indexed_status > 0
                   ORDER BY geometry_sector"""
 
                   WHERE indexed_status > 0
                   ORDER BY geometry_sector"""
 
+
     @staticmethod
     @staticmethod
-    def index_places(worker, ids):
-        worker.perform(""" UPDATE location_property_osmline
-                           SET indexed_status = 0 WHERE place_id IN ({})
-                       """.format(','.join((str(i[0]) for i in ids))))
+    @functools.lru_cache(maxsize=1)
+    def _index_sql(num_places):
+        return """ UPDATE location_property_osmline
+                   SET indexed_status = 0, address = v.addr, token_info = v.ti
+                   FROM (VALUES {}) as v(id, addr, ti)
+                   WHERE place_id = v.id
+               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+
+
+    def index_places(self, worker, places):
+        values = []
+        for place in places:
+            values.extend((place[x] for x in ('place_id', 'address')))
+            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+
+        worker.perform(self._index_sql(len(places)), values)
+
 
 
 class PostcodeRunner:
 
 
 class PostcodeRunner:
index 6ffdc4ef46cc0269348428d8ba7abb76d872f3b0..b95a6208f3fac47e978669f38e476ef5c16fc49f 100644 (file)
@@ -1,6 +1,7 @@
 """
 Tokenizer implementing normalisation as used before Nominatim 4.
 """
 """
 Tokenizer implementing normalisation as used before Nominatim 4.
 """
+from collections import OrderedDict
 import logging
 import re
 import shutil
 import logging
 import re
 import shutil
@@ -213,6 +214,15 @@ class LegacyNameAnalyzer:
             self.conn.close()
             self.conn = None
 
             self.conn.close()
             self.conn = None
 
+
+    def add_postcodes_from_db(self):
+        """ Add postcodes from the location_postcode table to the word table.
+        """
+        with self.conn.cursor() as cur:
+            cur.execute("""SELECT count(create_postcode_id(pc))
+                           FROM (SELECT distinct(postcode) as pc
+                                 FROM location_postcode) x""")
+
     def process_place(self, place):
         """ Determine tokenizer information about the given place.
 
     def process_place(self, place):
         """ Determine tokenizer information about the given place.
 
@@ -226,11 +236,25 @@ class LegacyNameAnalyzer:
         address = place.get('address')
 
         if address:
         address = place.get('address')
 
         if address:
+            self._add_postcode(address.get('postcode'))
             token_info.add_housenumbers(self.conn, address)
 
         return token_info.data
 
 
             token_info.add_housenumbers(self.conn, address)
 
         return token_info.data
 
 
+    def _add_postcode(self, postcode):
+        """ Make sure the normalized postcode is present in the word table.
+        """
+        if not postcode or re.search(r'[:,;]', postcode) is not None:
+            return
+
+        def _create_postcode_from_db(pcode):
+            with self.conn.cursor() as cur:
+                cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
+
+        self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
+
+
 class _TokenInfo:
     """ Collect token information to be sent back to the database.
     """
 class _TokenInfo:
     """ Collect token information to be sent back to the database.
     """
@@ -285,6 +309,32 @@ class _TokenInfo:
             self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
 
 
             self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
 
 
+class _LRU:
+    """ Least recently used cache that accepts a generator function to
+        produce the item when there is a cache miss.
+    """
+
+    def __init__(self, maxsize=128):
+        self.data = OrderedDict()
+        self.maxsize = maxsize
+
+    def get(self, key, generator):
+        """ Get the item with the given key from the cache. If nothing
+            is found in the cache, generate the value through the
+            generator function and store it in the cache.
+        """
+        value = self.data.get(key)
+        if value is not None:
+            self.data.move_to_end(key)
+        else:
+            value = generator(key)
+            if len(self.data) >= self.maxsize:
+                self.data.popitem(last=False)
+            self.data[key] = value
+
+        return value
+
+
 class _TokenCache:
     """ Cache for token information to avoid repeated database queries.
 
 class _TokenCache:
     """ Cache for token information to avoid repeated database queries.
 
@@ -292,6 +342,9 @@ class _TokenCache:
         analyzer.
     """
     def __init__(self, conn):
         analyzer.
     """
     def __init__(self, conn):
+        # various LRU caches
+        self.postcodes = _LRU(maxsize=32)
+
         # Lookup houseunumbers up to 100 and cache them
         with conn.cursor() as cur:
             cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
         # Lookup houseunumbers up to 100 and cache them
         with conn.cursor() as cur:
             cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
index 0a568cbafc73c125c4f9d34b7141b42b05eb6b11..78bd8cb9490c5646754cef2ef2bbf2348d5e2a74 100644 (file)
@@ -6,7 +6,7 @@ of artificial postcode centroids.
 from nominatim.db.utils import execute_file
 from nominatim.db.connection import connect
 
 from nominatim.db.utils import execute_file
 from nominatim.db.connection import connect
 
-def import_postcodes(dsn, project_dir):
+def import_postcodes(dsn, project_dir, tokenizer):
     """ Set up the initial list of postcodes.
     """
 
     """ Set up the initial list of postcodes.
     """
 
@@ -41,10 +41,11 @@ def import_postcodes(dsn, project_dir):
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
                 SELECT nextval('seq_place'), 1, country_code,
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
                 SELECT nextval('seq_place'), 1, country_code,
-                       upper(trim (both ' ' from address->'postcode')) as pc,
+                       token_normalized_postcode(address->'postcode') as pc,
                        ST_Centroid(ST_Collect(ST_Centroid(geometry)))
                   FROM placex
                        ST_Centroid(ST_Collect(ST_Centroid(geometry)))
                   FROM placex
-                 WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
+                 WHERE address ? 'postcode'
+                       and token_normalized_postcode(address->'postcode') is not null
                        AND geometry IS NOT null
                  GROUP BY country_code, pc
             """)
                        AND geometry IS NOT null
                  GROUP BY country_code, pc
             """)
@@ -52,9 +53,10 @@ def import_postcodes(dsn, project_dir):
             cur.execute("""
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
             cur.execute("""
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
-                SELECT nextval('seq_place'), 1, 'us', postcode,
+                SELECT nextval('seq_place'), 1, 'us',
+                       token_normalized_postcode(postcode),
                        ST_SetSRID(ST_Point(x,y),4326)
                        ST_SetSRID(ST_Point(x,y),4326)
-                  FROM us_postcode WHERE postcode NOT IN
+                  FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
                         (SELECT postcode FROM location_postcode
                           WHERE country_code = 'us')
             """)
                         (SELECT postcode FROM location_postcode
                           WHERE country_code = 'us')
             """)
@@ -62,8 +64,9 @@ def import_postcodes(dsn, project_dir):
             cur.execute("""
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
             cur.execute("""
                 INSERT INTO location_postcode
                  (place_id, indexed_status, country_code, postcode, geometry)
-                SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
-                  FROM gb_postcode WHERE postcode NOT IN
+                SELECT nextval('seq_place'), 1, 'gb',
+                       token_normalized_postcode(postcode), geometry
+                  FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
                            (SELECT postcode FROM location_postcode
                              WHERE country_code = 'gb')
             """)
                            (SELECT postcode FROM location_postcode
                              WHERE country_code = 'gb')
             """)
@@ -72,9 +75,7 @@ def import_postcodes(dsn, project_dir):
                     DELETE FROM word WHERE class='place' and type='postcode'
                     and word NOT IN (SELECT postcode FROM location_postcode)
             """)
                     DELETE FROM word WHERE class='place' and type='postcode'
                     and word NOT IN (SELECT postcode FROM location_postcode)
             """)
-
-            cur.execute("""
-                SELECT count(getorcreate_postcode_id(v)) FROM
-                (SELECT distinct(postcode) as v FROM location_postcode) p
-            """)
         conn.commit()
         conn.commit()
+
+        with tokenizer.name_analyzer() as analyzer:
+            analyzer.add_postcodes_from_db()
index d4649c24929b91835b6aa3dd7d34f985904ceed6..f43f09d074c7ab56ce3837872ff2539ba3f18f86 100644 (file)
@@ -299,7 +299,7 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
 
 
 @pytest.fixture
 
 
 @pytest.fixture
-def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):
     """ Sets up the configuration so that the test dummy tokenizer will be
         loaded.
     """
     """ Sets up the configuration so that the test dummy tokenizer will be
         loaded.
     """
index 013016c8d0ba7c2e84bddab61bfb3428944a205a..ceea4a7ededdaa1e829e6ebf902ef42455e33e47 100644 (file)
@@ -43,6 +43,9 @@ class DummyNameAnalyzer:
         """
         pass
 
         """
         pass
 
+    def add_postcodes_from_db(self):
+        pass
+
     def process_place(self, place):
         """ Determine tokenizer information about the given place.
 
     def process_place(self, place):
         """ Determine tokenizer information about the given place.
 
index d68769064af9d6ff626ac1e78b9bcd684b9c6c51..ff84e37964b660a33be2faa0908ba2ad0c1519a7 100644 (file)
@@ -33,6 +33,9 @@ class IndexerTestDB:
                                                 geometry_sector INTEGER)""")
             cur.execute("""CREATE TABLE location_property_osmline (
                                place_id BIGINT,
                                                 geometry_sector INTEGER)""")
             cur.execute("""CREATE TABLE location_property_osmline (
                                place_id BIGINT,
+                               osm_id BIGINT,
+                               address HSTORE,
+                               token_info JSONB,
                                indexed_status SMALLINT,
                                indexed_date TIMESTAMP,
                                geometry_sector INTEGER)""")
                                indexed_status SMALLINT,
                                indexed_date TIMESTAMP,
                                geometry_sector INTEGER)""")
@@ -61,6 +64,14 @@ class IndexerTestDB:
                            END;
                            $$ LANGUAGE plpgsql STABLE;
                         """)
                            END;
                            $$ LANGUAGE plpgsql STABLE;
                         """)
+            cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
+                           RETURNS HSTORE AS $$
+                           BEGIN
+                             RETURN in_address;
+                           END;
+                           $$ LANGUAGE plpgsql STABLE;
+                        """)
+
             for table in ('placex', 'location_property_osmline', 'location_postcode'):
                 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
                                FOR EACH ROW EXECUTE PROCEDURE date_update()
             for table in ('placex', 'location_property_osmline', 'location_postcode'):
                 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
                                FOR EACH ROW EXECUTE PROCEDURE date_update()
@@ -91,9 +102,9 @@ class IndexerTestDB:
         next_id = next(self.osmline_id)
         with self.conn.cursor() as cur:
             cur.execute("""INSERT INTO location_property_osmline
         next_id = next(self.osmline_id)
         with self.conn.cursor() as cur:
             cur.execute("""INSERT INTO location_property_osmline
-                              (place_id, indexed_status, geometry_sector)
-                              VALUES (%s, 1, %s)""",
-                        (next_id, sector))
+                              (place_id, osm_id, indexed_status, geometry_sector)
+                              VALUES (%s, %s, 1, %s)""",
+                        (next_id, next_id, sector))
         return next_id
 
     def add_postcode(self, country, postcode):
         return next_id
 
     def add_postcode(self, country, postcode):
index 1fc060b0c6439677e592aa90d4d48d3db51a8ea6..37b47dfa680258e5587fa40e5057d7fd85904a96 100644 (file)
@@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
 import pytest
 
 from nominatim.tools import postcodes
 import pytest
 
 from nominatim.tools import postcodes
+import dummy_tokenizer
+
+@pytest.fixture
+def tokenizer():
+    return dummy_tokenizer.DummyTokenizer(None, None)
 
 @pytest.fixture
 def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
 
 @pytest.fixture
 def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
@@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
                       postcode TEXT,
                       geometry GEOMETRY(Geometry, 4326)""")
     temp_db_cursor.execute('CREATE SEQUENCE seq_place')
                       postcode TEXT,
                       geometry GEOMETRY(Geometry, 4326)""")
     temp_db_cursor.execute('CREATE SEQUENCE seq_place')
-    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
-                              RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
+                              RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
                            """)
 
 
                            """)
 
 
-def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
-    postcodes.import_postcodes(dsn, tmp_path)
+def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)
 
     assert temp_db_cursor.table_exists('gb_postcode')
     assert temp_db_cursor.table_exists('us_postcode')
     assert temp_db_cursor.table_rows('location_postcode') == 0
 
 
 
     assert temp_db_cursor.table_exists('gb_postcode')
     assert temp_db_cursor.table_exists('us_postcode')
     assert temp_db_cursor.table_rows('location_postcode') == 0
 
 
-def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
+def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
     temp_db_cursor.execute("""
         INSERT INTO placex (place_id, country_code, address, geometry)
           VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
     """)
 
     temp_db_cursor.execute("""
         INSERT INTO placex (place_id, country_code, address, geometry)
           VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
     """)
 
-    postcodes.import_postcodes(dsn, tmp_path)
+    postcodes.import_postcodes(dsn, tmp_path, tokenizer)
 
     rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
                                       ST_X(geometry), ST_Y(geometry)
 
     rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
                                       ST_X(geometry), ST_Y(geometry)