CREATE TABLE public.country_name (
country_code character varying(2),
name public.hstore,
+ derived_name public.hstore,
country_default_language_code text,
partition integer
);
DECLARE
place addressdata_place;
location RECORD;
+ country RECORD;
current_rank_address INTEGER;
location_isaddress BOOLEAN;
BEGIN
WHERE place_id = place.place_id
LOOP
--RAISE WARNING '%',location;
+ -- mix in default names for countries
+ IF location.rank_address = 4 and place.country_code is not NULL THEN
+ FOR country IN
+ SELECT coalesce(name, ''::hstore) as name FROM country_name
+ WHERE country_code = place.country_code LIMIT 1
+ LOOP
+ place.name := country.name || place.name;
+ END LOOP;
+ END IF;
+
IF location.rank_address < 4 THEN
-- no country locations for ranks higher than country
place.country_code := NULL::varchar(2);
-- If no country was included yet, add the name information from country_name.
IF current_rank_address > 4 THEN
FOR location IN
- SELECT name FROM country_name WHERE country_code = place.country_code LIMIT 1
+ SELECT name || coalesce(derived_name, ''::hstore) as name FROM country_name
+ WHERE country_code = place.country_code LIMIT 1
LOOP
--RAISE WARNING '% % %',current_rank_address,searchcountrycode,countryname;
RETURN NEXT ROW(null, null, null, location.name, 'place', 'country', NULL,
-- ---- All other place types.
- -- Patch in additional country names
- IF NEW.admin_level = 2 and NEW.type = 'administrative' and NEW.address ? 'country'
- THEN
- FOR country IN
- SELECT name FROM country_name WHERE country_code = lower(NEW.address->'country')
- LOOP
- NEW.name = country.name || NEW.name;
- END LOOP;
- END IF;
-
-- When an area is changed from large to small: log and discard change
IF existing.geometry is not null AND ST_IsValid(existing.geometry)
AND ST_Area(existing.geometry) > 0.02
AND NEW.class = 'boundary' AND NEW.type = 'administrative'
AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R'
THEN
- -- Update the list of country names. Adding an additional sanity
- -- check here: make sure the country does overlap with the area where
- -- we expect it to be as per static country grid.
+ -- Update the list of country names.
+ -- Only take the name from the largest area for the given country code
+ -- in the hope that this is the authoritive one.
+ -- Also replace any old names so that all mapping mistakes can
+ -- be fixed through regular OSM updates.
FOR location IN
- SELECT country_code FROM country_osm_grid
- WHERE ST_Covers(geometry, NEW.centroid) and country_code = NEW.country_code
+ SELECT osm_id FROM placex
+ WHERE rank_search = 4 and osm_type = 'R'
+ and country_code = NEW.country_code
+ ORDER BY ST_Area(geometry) desc
LIMIT 1
LOOP
- {% if debug %}RAISE WARNING 'Updating names for country '%' with: %', NEW.country_code, NEW.name;{% endif %}
- UPDATE country_name SET name = name || NEW.name WHERE country_code = NEW.country_code;
+ IF location.osm_id = NEW.osm_id THEN
+ {% if debug %}RAISE WARNING 'Updating names for country '%' with: %', NEW.country_code, NEW.name;{% endif %}
+ UPDATE country_name SET derived_name = NEW.name WHERE country_code = NEW.country_code;
+ END IF;
END LOOP;
END IF;
def add_country_names(self, country_code, names):
- """ Add names for the given country to the search index.
+ """ Add default names for the given country to the search index.
"""
# Make sure any name preprocessing for country names applies.
info = PlaceInfo({'name': names, 'country_code': country_code,
'rank_address': 4, 'class': 'boundary',
'type': 'administrative'})
self._add_country_full_names(country_code,
- self.sanitizer.process_names(info)[0])
+ self.sanitizer.process_names(info)[0],
+ internal=True)
- def _add_country_full_names(self, country_code, names):
+ def _add_country_full_names(self, country_code, names, internal=False):
""" Add names for the given country from an already sanitized
name list.
"""
with self.conn.cursor() as cur:
# Get existing names
- cur.execute("""SELECT word_token FROM word
- WHERE type = 'C' and word = %s""",
+ cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
+ FROM word
+ WHERE type = 'C' and word = %s""",
(country_code, ))
- word_tokens.difference_update((t[0] for t in cur))
+ existing_tokens = {True: set(), False: set()} # internal/external names
+ for word in cur:
+ existing_tokens[word[1]].add(word[0])
+
+ # Delete names that no longer exist.
+ gone_tokens = existing_tokens[internal] - word_tokens
+ if internal:
+ gone_tokens.update(existing_tokens[False] & word_tokens)
+ if gone_tokens:
+ cur.execute("""DELETE FROM word
+ USING unnest(%s) as token
+ WHERE type = 'C' and word = %s
+ and word_token = token""",
+ (list(gone_tokens), country_code))
# Only add those names that are not yet in the list.
- if word_tokens:
- cur.execute("""INSERT INTO word (word_token, type, word)
- (SELECT token, 'C', %s
- FROM unnest(%s) as token)
- """, (country_code, list(word_tokens)))
-
- # No names are deleted at the moment.
- # If deletion is made possible, then the static names from the
- # initial 'country_name' table should be kept.
+ new_tokens = word_tokens - existing_tokens[True]
+ if not internal:
+ new_tokens -= existing_tokens[False]
+ if new_tokens:
+ if internal:
+ sql = """INSERT INTO word (word_token, type, word, info)
+ (SELECT token, 'C', %s, '{"internal": "yes"}'
+ FROM unnest(%s) as token)
+ """
+ else:
+ sql = """INSERT INTO word (word_token, type, word)
+ (SELECT token, 'C', %s
+ FROM unnest(%s) as token)
+ """
+ cur.execute(sql, (country_code, list(new_tokens)))
def process_place(self, place):
SET step = CASE WHEN interpolationtype = 'all'
THEN 1 ELSE 2 END
""")
+
+
+@_migration(4, 0, 99, 4)
+def add_derived_name_column_for_country_names(conn, **_):
+ """ Add a new column 'derived_name' which in the future takes the
+ country names as imported from OSM data.
+ """
+ with conn.cursor() as cur:
+ cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
# patch level when cherry-picking the commit with the migration.
#
# Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (4, 0, 99, 4)
+NOMINATIM_VERSION = (4, 0, 99, 5)
POSTGRESQL_REQUIRED_VERSION = (9, 5)
POSTGIS_REQUIRED_VERSION = (2, 2)
When importing
Then placex has no entry for R1
- Scenario: search and address ranks for GB post codes correctly assigned
- Given the places
- | osm | class | type | postcode | geometry |
- | N1 | place | postcode | E45 2CD | country:gb |
- | N2 | place | postcode | E45 2 | country:gb |
- | N3 | place | postcode | Y45 | country:gb |
- When importing
- Then placex contains
- | object | addr+postcode | country_code | rank_search | rank_address |
- | N1 | E45 2CD | gb | 25 | 5 |
- | N2 | E45 2 | gb | 23 | 5 |
- | N3 | Y45 | gb | 21 | 5 |
-
- Scenario: wrongly formatted GB postcodes are down-ranked
- Given the places
- | osm | class | type | postcode | geometry |
- | N1 | place | postcode | EA452CD | country:gb |
- | N2 | place | postcode | E45 23 | country:gb |
- When importing
- Then placex contains
- | object | country_code | rank_search | rank_address |
- | N1 | gb | 30 | 30 |
- | N2 | gb | 30 | 30 |
-
- Scenario: search and address rank for DE postcodes correctly assigned
- Given the places
- | osm | class | type | postcode | geometry |
- | N1 | place | postcode | 56427 | country:de |
- | N2 | place | postcode | 5642 | country:de |
- | N3 | place | postcode | 5642A | country:de |
- | N4 | place | postcode | 564276 | country:de |
- When importing
- Then placex contains
- | object | country_code | rank_search | rank_address |
- | N1 | de | 21 | 11 |
- | N2 | de | 30 | 30 |
- | N3 | de | 30 | 30 |
- | N4 | de | 30 | 30 |
-
- Scenario: search and address rank for other postcodes are correctly assigned
- Given the places
- | osm | class | type | postcode | geometry |
- | N1 | place | postcode | 1 | country:ca |
- | N2 | place | postcode | X3 | country:ca |
- | N3 | place | postcode | 543 | country:ca |
- | N4 | place | postcode | 54dc | country:ca |
- | N5 | place | postcode | 12345 | country:ca |
- | N6 | place | postcode | 55TT667 | country:ca |
- | N7 | place | postcode | 123-65 | country:ca |
- | N8 | place | postcode | 12 445 4 | country:ca |
- | N9 | place | postcode | A1:bc10 | country:ca |
- When importing
- Then placex contains
- | object | country_code | rank_search | rank_address |
- | N1 | ca | 21 | 11 |
- | N2 | ca | 21 | 11 |
- | N3 | ca | 21 | 11 |
- | N4 | ca | 21 | 11 |
- | N5 | ca | 21 | 11 |
- | N6 | ca | 21 | 11 |
- | N7 | ca | 25 | 11 |
- | N8 | ca | 25 | 11 |
- | N9 | ca | 25 | 11 |
-
Scenario: search and address ranks for boundaries are correctly assigned
Given the named places
| osm | class | type |
When sending search query "E4 7EA"
Then results contain
| type | display_name |
- | postcode | E4 7EA |
+ | postcode | E4 7EA |
+
+ Scenario: search and address ranks for GB post codes correctly assigned
+ Given the places
+ | osm | class | type | postcode | geometry |
+ | N1 | place | postcode | E45 2CD | country:gb |
+ | N2 | place | postcode | E45 2 | country:gb |
+ | N3 | place | postcode | Y45 | country:gb |
+ When importing
+ Then location_postcode contains exactly
+ | postcode | country | rank_search | rank_address |
+ | E45 2CD | gb | 25 | 5 |
+ | E45 2 | gb | 23 | 5 |
+ | Y45 | gb | 21 | 5 |
+
+ Scenario: wrongly formatted GB postcodes are down-ranked
+ Given the places
+ | osm | class | type | postcode | geometry |
+ | N1 | place | postcode | EA452CD | country:gb |
+ | N2 | place | postcode | E45 23 | country:gb |
+ When importing
+ Then location_postcode contains exactly
+ | postcode | country | rank_search | rank_address |
+ | EA452CD | gb | 30 | 30 |
+ | E45 23 | gb | 30 | 30 |
+
+ Scenario: search and address rank for DE postcodes correctly assigned
+ Given the places
+ | osm | class | type | postcode | geometry |
+ | N1 | place | postcode | 56427 | country:de |
+ | N2 | place | postcode | 5642 | country:de |
+ | N3 | place | postcode | 5642A | country:de |
+ | N4 | place | postcode | 564276 | country:de |
+ When importing
+ Then location_postcode contains exactly
+ | postcode | country | rank_search | rank_address |
+ | 56427 | de | 21 | 11 |
+ | 5642 | de | 30 | 30 |
+ | 5642A | de | 30 | 30 |
+ | 564276 | de | 30 | 30 |
+
+ Scenario: search and address rank for other postcodes are correctly assigned
+ Given the places
+ | osm | class | type | postcode | geometry |
+ | N1 | place | postcode | 1 | country:ca |
+ | N2 | place | postcode | X3 | country:ca |
+ | N3 | place | postcode | 543 | country:ca |
+ | N4 | place | postcode | 54dc | country:ca |
+ | N5 | place | postcode | 12345 | country:ca |
+ | N6 | place | postcode | 55TT667 | country:ca |
+ | N7 | place | postcode | 123-65 | country:ca |
+ | N8 | place | postcode | 12 445 4 | country:ca |
+ | N9 | place | postcode | A1:bc10 | country:ca |
+ When importing
+ Then location_postcode contains exactly
+ | postcode | country | rank_search | rank_address |
+ | 1 | ca | 21 | 11 |
+ | X3 | ca | 21 | 11 |
+ | 543 | ca | 21 | 11 |
+ | 54DC | ca | 21 | 11 |
+ | 12345 | ca | 21 | 11 |
+ | 55TT667 | ca | 21 | 11 |
+ | 123-65 | ca | 25 | 11 |
+ | 12 445 4 | ca | 25 | 11 |
+ | A1:BC10 | ca | 25 | 11 |
+
+
| N18 | place | city | 0 0 |
| N19 | place | island | 0 0 |
| N36 | place | house | 0 0 |
- | N38 | place | houses | 0 0 |
And the named places
| osm | class | type | extra+capital | geometry |
| N101 | place | city | yes | 0 0 |
| N19 | 17 | 0 |
| N101 | 15 | 16 |
| N36 | 30 | 30 |
- | N38 | 28 | 0 |
Scenario: Ranks for boundaries are assigned according to admin level
Given the named places
Scenario: Unprintable characters in postcodes are ignored
Given the named places
- | osm | class | type | address |
- | N234 | amenity | prison | 'postcode' : u'1234\u200e' |
+ | osm | class | type | address | geometry |
+ | N234 | amenity | prison | 'postcode' : u'1234\u200e' | country:de |
When importing
And sending search query "1234"
Then result 0 has not attributes osm_type
| osm | class | type | postcode | geometry |
| R1 | boundary | postal_code | 54321 | poly-area:1.0 |
And sending search query "12345"
- Then result 0 has not attributes osm_type
+ Then exactly 0 results are returned
When sending search query "54321"
Then results contain
| ID | osm |
--- /dev/null
+@DB
+Feature: Country handling
+ Tests for update of country information
+
+ @fail-legacy
+ Scenario: When country names are changed old ones are no longer searchable
+ Given the places
+ | osm | class | type | admin | name+name:xy | country | geometry |
+ | R1 | boundary | administrative | 2 | Loudou | de | (9 52, 9 53, 10 52, 9 52) |
+ Given the places
+ | osm | class | type | name | geometry |
+ | N1 | place | town | Wenig | country:de |
+ When importing
+ When sending search query "Wenig, Loudou"
+ Then results contain
+ | osm |
+ | N1 |
+ When updating places
+ | osm | class | type | admin | name+name:xy | country | geometry |
+ | R1 | boundary | administrative | 2 | Germany | de | (9 52, 9 53, 10 52, 9 52) |
+ When sending search query "Wenig, Loudou"
+ Then exactly 0 results are returned
+
+ @fail-legacy
+ Scenario: When country names are deleted they are no longer searchable
+ Given the places
+ | osm | class | type | admin | name+name:xy | country | geometry |
+ | R1 | boundary | administrative | 2 | Loudou | de | (9 52, 9 53, 10 52, 9 52) |
+ Given the places
+ | osm | class | type | name | geometry |
+ | N1 | place | town | Wenig | country:de |
+ When importing
+ When sending search query "Wenig, Loudou"
+ Then results contain
+ | osm |
+ | N1 |
+ When updating places
+ | osm | class | type | admin | name+name:en | country | geometry |
+ | R1 | boundary | administrative | 2 | Germany | de | (9 52, 9 53, 10 52, 9 52) |
+ When sending search query "Wenig, Loudou"
+ Then exactly 0 results are returned
+ When sending search query "Wenig"
+ | accept-language |
+ | xy,en |
+ Then results contain
+ | osm | display_name |
+ | N1 | Wenig, Germany |
+
+
+ Scenario: Default country names are always searchable
+ Given the places
+ | osm | class | type | name | geometry |
+ | N1 | place | town | Wenig | country:de |
+ When importing
+ When sending search query "Wenig, Germany"
+ Then results contain
+ | osm |
+ | N1 |
+ When updating places
+ | osm | class | type | admin | name+name:en | country | geometry |
+ | R1 | boundary | administrative | 2 | Lilly | de | (9 52, 9 53, 10 52, 9 52) |
+ When sending search query "Wenig, Germany"
+ | accept-language |
+ | en,de |
+ Then results contain
+ | osm | display_name |
+ | N1 | Wenig, Lilly |
+
+
+ @fail-legacy
+ Scenario: When a localised name is deleted, the standard name takes over
+ Given the places
+ | osm | class | type | admin | name+name:de | country | geometry |
+ | R1 | boundary | administrative | 2 | Loudou | de | (9 52, 9 53, 10 52, 9 52) |
+ Given the places
+ | osm | class | type | name | geometry |
+ | N1 | place | town | Wenig | country:de |
+ When importing
+ When sending search query "Wenig, Loudou"
+ | accept-language |
+ | de,en |
+ Then results contain
+ | osm | display_name |
+ | N1 | Wenig, Loudou |
+ When updating places
+ | osm | class | type | admin | name+name:en | country | geometry |
+ | R1 | boundary | administrative | 2 | Germany | de | (9 52, 9 53, 10 52, 9 52) |
+ When sending search query "Wenig, Loudou"
+ Then exactly 0 results are returned
+ When sending search query "Wenig"
+ | accept-language |
+ | de,en |
+ Then results contain
+ | osm | display_name |
+ | N1 | Wenig, Deutschland |
+
| osm | class | type | postcode | geometry |
| N3 | place | postcode | 12345 | 1 -1 |
When importing
- Then placex contains
- | object | class | type |
- | N3 | place | postcode |
+ Then placex has no entry for N3
When updating places
| osm | class | type | postcode | housenr | geometry |
| N3 | place | house | 12345 | 13 | 1 -1 |
Then place contains
| object | addr+country | name |
| R1 | XX | 'name' : 'Foo' |
-
- Scenario: Country boundary names are extended when country_code known
- When loading osm data
- """
- n200 Tamenity=prison x0 y0
- n201 x0 y0.0001
- n202 x0.0001 y0.0001
- n203 x0.0001 y0
- """
- And updating osm data
- """
- w1 Nn200,n201,n202,n203,n200
- r1 Ttype=boundary,boundary=administrative,name=Foo,country_code=ch,admin_level=2 Mw1@
- """
- Then place contains
- | object | addr+country | name+name:de | name+name |
- | R1 | ch | Schweiz | Foo |
-
def import_and_index_data_from_place_table(context):
""" Import data previously set up in the place table.
"""
- nctx = context.nominatim
-
- tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
- context.nominatim.copy_from_place(context.db)
-
- # XXX use tool function as soon as it is ported
- with context.db.cursor() as cur:
- with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
- cur.execute(fd.read())
- cur.execute("""
- INSERT INTO location_postcode
- (place_id, indexed_status, country_code, postcode, geometry)
- SELECT nextval('seq_place'), 1, country_code,
- upper(trim (both ' ' from address->'postcode')) as pc,
- ST_Centroid(ST_Collect(ST_Centroid(geometry)))
- FROM placex
- WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
- AND geometry IS NOT null
- GROUP BY country_code, pc""")
-
- # Call directly as the refresh function does not include postcodes.
- indexer.LOG.setLevel(logging.ERROR)
- indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
+ context.nominatim.run_nominatim('import', '--continue', 'load-data',
+ '--index-noanalyse', '-q')
check_database_integrity(context)
for row in context.table:
db_row = results.get((row['country'],row['postcode']))
assert db_row is not None, \
- "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
+ f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
db_row.assert_row(row, ('country', 'postcode'))
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
for row in context.table:
pid = NominatimID(row['object']).get_place_id(cur)
- apid = NominatimID(row['address']).get_place_id(cur)
- cur.execute(""" SELECT * FROM place_addressline
- WHERE place_id = %s AND address_place_id = %s""",
- (pid, apid))
- assert cur.rowcount == 0, \
- "Row found for place %s and address %s" % (row['object'], row['address'])
+ apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
+ if apid is not None:
+ cur.execute(""" SELECT * FROM place_addressline
+ WHERE place_id = %s AND address_place_id = %s""",
+ (pid, apid))
+ assert cur.rowcount == 0, \
+ "Row found for place %s and address %s" % (row['object'], row['address'])
@then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
def check_location_property_osmline(context, oid, neg):
','.join(['*'] + (extra_columns or [])), table)
cur.execute(query, (pid, ))
- def get_place_id(self, cur):
+ def get_place_id(self, cur, allow_empty=False):
""" Look up the place id for the ID. Throws an assertion if the ID
is not unique.
"""
self.query_osm_id(cur, "SELECT place_id FROM placex WHERE {}")
+ if cur.rowcount == 0 and allow_empty:
+ return None
+
assert cur.rowcount == 1, \
"Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount)