+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
import logging
from itertools import chain
def import_and_index_data_from_place_table(context):
""" Import data previously set up in the place table.
"""
- nctx = context.nominatim
-
- tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
- context.nominatim.copy_from_place(context.db)
-
- # XXX use tool function as soon as it is ported
- with context.db.cursor() as cur:
- with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
- cur.execute(fd.read())
- cur.execute("""
- INSERT INTO location_postcode
- (place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, country_code,
- upper(trim (both ' ' from address->'postcode')) as pc,
- ST_Centroid(ST_Collect(ST_Centroid(geometry)))
- FROM placex
- WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
- AND geometry IS NOT null
- GROUP BY country_code, pc""")
-
- # Call directly as the refresh function does not include postcodes.
- indexer.LOG.setLevel(logging.ERROR)
- indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
+ context.nominatim.run_nominatim('refresh', '--functions')
+ context.nominatim.run_nominatim('import', '--continue', 'load-data',
+ '--index-noanalyse', '-q')
check_database_integrity(context)
for name, value in zip(row.headings, row.cells):
if name in ('name_vector', 'nameaddress_vector'):
items = [x.strip() for x in value.split(',')]
- tokens = analyzer.get_word_token_info(context.db, items)
+ tokens = analyzer.get_word_token_info(items)
if not exclude:
assert len(tokens) >= len(items), \
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
assert cur.rowcount == len(list(context.table)), \
- "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table)))
+ "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
results = {}
for row in cur:
for row in context.table:
db_row = results.get((row['country'],row['postcode']))
assert db_row is not None, \
- "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
+ f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
db_row.assert_row(row, ('country', 'postcode'))
-@then("word contains(?P<exclude> not)?")
-def check_word_table(context, exclude):
- """ Check the contents of the word table. Each row represents a table row
- and all data must match. Data not present in the expected table, may
- be arbitry. The rows are identified via all given columns.
+@then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
+def check_word_table_for_postcodes(context, exclude, postcodes):
+ """ Check that the tokenizer produces postcode tokens for the given
+ postcodes. The postcodes are a comma-separated list of postcodes.
+ Whitespace matters.
"""
+ nctx = context.nominatim
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
+ with tokenizer.name_analyzer() as ana:
+ plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
+
+ plist.sort()
+
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
- for row in context.table:
- wheres = ' AND '.join(["{} = %s".format(h) for h in row.headings])
- cur.execute("SELECT * from word WHERE " + wheres, list(row.cells))
- if exclude:
- assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values)
- else:
- assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
+ if nctx.tokenizer == 'icu':
+ cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
+ (plist,))
+ else:
+ cur.execute("""SELECT word FROM word WHERE word = any(%s)
+ and class = 'place' and type = 'postcode'""",
+ (plist,))
+
+ found = [row[0] for row in cur]
+ assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
+
+ if exclude:
+ assert len(found) == 0, f"Unexpected postcodes: {found}"
+ else:
+ assert set(found) == set(plist), \
+ f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
@then("place_addressline contains")
def check_place_addressline(context):
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
for row in context.table:
pid = NominatimID(row['object']).get_place_id(cur)
- apid = NominatimID(row['address']).get_place_id(cur)
- cur.execute(""" SELECT * FROM place_addressline
- WHERE place_id = %s AND address_place_id = %s""",
- (pid, apid))
- assert cur.rowcount == 0, \
- "Row found for place %s and address %s" % (row['object'], row['address'])
+ apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
+ if apid is not None:
+ cur.execute(""" SELECT * FROM place_addressline
+ WHERE place_id = %s AND address_place_id = %s""",
+ (pid, apid))
+ assert cur.rowcount == 0, \
+ "Row found for place %s and address %s" % (row['object'], row['address'])
@then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
def check_location_property_osmline(context, oid, neg):