also check out the osm2pgsql subproject:
```
-git clone --recursive git://github.com/openstreetmap/Nominatim.git
+git clone --recursive https://github.com/openstreetmap/Nominatim.git
```
The development version does not include the country grid. Download it separately:
The token-analysis section contains the list of configured analyzers. Each
analyzer must have an `id` parameter that uniquely identifies the analyzer.
The only exception is the default analyzer that is used when no special
-analyzer was selected.
+analyzer was selected. There is one special id '@housenumber'. If an analyzer
+with that name is present, it is used for normalization of house numbers.
Different analyzer implementations may exist. To select the implementation,
-the `analyzer` parameter must be set. Currently there is only one implementation
-`generic` which is described in the following.
+the `analyzer` parameter must be set. The different implementations are
+described in the following.
##### Generic token analyzer
-The generic analyzer is able to create variants from a list of given
+The generic analyzer `generic` is able to create variants from a list of given
abbreviation and decomposition replacements and introduce spelling variations.
###### Variants
to the analyser configuration.
+##### Housenumber token analyzer
+
+The analyzer `housenumbers` is purpose-made to analyze house numbers. It
+creates variants with optional spaces between numbers and letters. Thus,
+house numbers of the form '3 a', '3A', '3-A' etc. are all considered equivalent.
+
+The analyzer cannot be customized.
+
### Reconfiguration
Changing the configuration after the import is currently not possible, although
{
Debug::newFunction('lookupInterpolation');
$sSQL = 'SELECT place_id, parent_place_id, 30 as rank_search,';
- $sSQL .= ' (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fhnr,';
+ $sSQL .= ' (CASE WHEN endnumber != startnumber';
+ $sSQL .= ' THEN (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.')';
+ $sSQL .= ' ELSE startnumber END) as fhnr,';
$sSQL .= ' startnumber, endnumber, step,';
$sSQL .= ' ST_Distance(linegeo,'.$sPointSQL.') as distance';
$sSQL .= ' FROM location_property_osmline';
$sSQL = 'SELECT word_id, word_token, type, word,';
$sSQL .= " info->>'op' as operator,";
$sSQL .= " info->>'class' as class, info->>'type' as ctype,";
- $sSQL .= " info->>'count' as count";
+ $sSQL .= " info->>'count' as count,";
+ $sSQL .= " info->>'lookup' as lookup";
$sSQL .= ' FROM word WHERE word_token in (';
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
}
break;
case 'H': // house number tokens
- $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $aWord['word_token']));
+ $sLookup = $aWord['lookup'] ?? $aWord['word_token'];
+ $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $sLookup));
break;
case 'P': // postcode tokens
// Postcodes are not normalized, so they may have content
WHERE s.place_id = parent_place_id;
FOR addr_item IN
- SELECT (get_addr_tag_rank(key, country)).*, key,
+ SELECT ranks.*, key,
token_get_address_search_tokens(token_info, key) as search_tokens
- FROM token_get_address_keys(token_info) as key
+ FROM token_get_address_keys(token_info) as key,
+ LATERAL get_addr_tag_rank(key, country) as ranks
WHERE not token_get_address_search_tokens(token_info, key) <@ parent_address_vector
LOOP
addr_place := get_address_place(in_partition, geometry,
address_havelevel := array_fill(false, ARRAY[maxrank]);
FOR location IN
- SELECT (get_address_place(partition, geometry, from_rank, to_rank,
- extent, token_info, key)).*, key
- FROM (SELECT (get_addr_tag_rank(key, country)).*, key
- FROM token_get_address_keys(token_info) as key) x
+ SELECT apl.*, key
+ FROM (SELECT extra.*, key
+ FROM token_get_address_keys(token_info) as key,
+ LATERAL get_addr_tag_rank(key, country) as extra) x,
+ LATERAL get_address_place(partition, geometry, from_rank, to_rank,
+ extent, token_info, key) as apl
ORDER BY rank_address, distance, isguess desc
LOOP
IF location.place_id is null THEN
END;
$$
LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION create_analyzed_hnr_id(norm_term TEXT, lookup_terms TEXT[])
+ RETURNS INTEGER
+ AS $$
+DECLARE
+ return_id INTEGER;
+BEGIN
+ SELECT min(word_id) INTO return_id
+ FROM word WHERE word = norm_term and type = 'H';
+
+ IF return_id IS NULL THEN
+ return_id := nextval('seq_word');
+ INSERT INTO word (word_id, word_token, type, word, info)
+ SELECT return_id, lookup_term, 'H', norm_term,
+ json_build_object('lookup', lookup_terms[1])
+ FROM unnest(lookup_terms) as lookup_term;
+ END IF;
+
+ RETURN return_id;
+END;
+$$
+LANGUAGE plpgsql;
CREATE INDEX idx_word_full_word ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'W';
+-- Used when inserting analyzed housenumbers (exclude old-style entries).
+CREATE INDEX idx_word_housenumbers ON word
+ USING btree(word) {{db.tablespace.address_index}}
+ WHERE type = 'H' and word is not null;
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
@staticmethod
def get_place_details(worker, ids):
- worker.perform("""SELECT place_id, (placex_indexing_prepare(placex)).*
- FROM placex WHERE place_id IN %s""",
+ worker.perform("""SELECT place_id, extra.*
+ FROM placex, LATERAL placex_indexing_prepare(placex) as extra
+ WHERE place_id IN %s""",
(tuple((p[0] for p in ids)), ))
self.search = Transliterator.createFromRules("icu_search",
norm_rules + trans_rules)
- self.analysis = {name: arules.create(self.to_ascii, arules.config)
+ self.analysis = {name: arules.create(self.normalizer, self.to_ascii, arules.config)
for name, arules in analysis_rules.items()}
+
+
+ def get_analyzer(self, name):
+ """ Return the given named analyzer. If no analyzer with that
+ name exists, return the default analyzer.
+ """
+ return self.analysis.get(name) or self.analysis[None]
if not conn.table_exists('search_name'):
return
with conn.cursor(name="hnr_counter") as cur:
- cur.execute("""SELECT word_id, word_token FROM word
+ cur.execute("""SELECT DISTINCT word_id, coalesce(info->>'lookup', word_token)
+ FROM word
WHERE type = 'H'
AND NOT EXISTS(SELECT * FROM search_name
WHERE ARRAY[word.word_id] && name_vector)
- AND (char_length(word_token) > 6
- OR word_token not similar to '\\d+')
+ AND (char_length(coalesce(word, word_token)) > 6
+ OR coalesce(word, word_token) not similar to '\\d+')
""")
candidates = {token: wid for wid, token in cur}
with conn.cursor(name="hnr_counter") as cur:
for hnr in row[0].split(';'):
candidates.pop(hnr, None)
LOG.info("There are %s outdated housenumbers.", len(candidates))
+ LOG.debug("Outdated housenumbers: %s", candidates.keys())
if candidates:
with conn.cursor() as cur:
cur.execute("""DELETE FROM word WHERE word_id = any(%s)""",
return postcode.strip().upper()
- def _make_standard_hnr(self, hnr):
- """ Create a normalised version of a housenumber.
-
- This function takes minor shortcuts on transliteration.
- """
- return self._search_normalized(hnr)
-
def update_postcodes_from_db(self):
""" Update postcode tokens in the word table from the location_postcode
table.
Returns a JSON-serializable structure that will be handed into
the database via the token_info field.
"""
- token_info = _TokenInfo(self._cache)
+ token_info = _TokenInfo()
names, address = self.sanitizer.process_names(place)
if names:
- fulls, partials = self._compute_name_tokens(names)
-
- token_info.add_names(fulls, partials)
+ token_info.set_names(*self._compute_name_tokens(names))
if place.is_country():
self._add_country_full_names(place.country_code, names)
if address:
self._process_place_address(token_info, address)
- return token_info.data
+ return token_info.to_dict()
def _process_place_address(self, token_info, address):
- hnrs = set()
- addr_terms = []
- streets = []
for item in address:
if item.kind == 'postcode':
self._add_postcode(item.name)
elif item.kind == 'housenumber':
- norm_name = self._make_standard_hnr(item.name)
- if norm_name:
- hnrs.add(norm_name)
+ token_info.add_housenumber(*self._compute_housenumber_token(item))
elif item.kind == 'street':
- streets.extend(self._retrieve_full_tokens(item.name))
+ token_info.add_street(self._retrieve_full_tokens(item.name))
elif item.kind == 'place':
if not item.suffix:
token_info.add_place(self._compute_partial_tokens(item.name))
elif not item.kind.startswith('_') and not item.suffix and \
item.kind not in ('country', 'full'):
- addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
+ token_info.add_address_term(item.kind, self._compute_partial_tokens(item.name))
- if hnrs:
- token_info.add_housenumbers(self.conn, hnrs)
- if addr_terms:
- token_info.add_address_terms(addr_terms)
+ def _compute_housenumber_token(self, hnr):
+ """ Normalize the housenumber and return the word token and the
+ canonical form.
+ """
+ analyzer = self.token_analysis.analysis.get('@housenumber')
+ result = None, None
+
+ if analyzer is None:
+ # When no custom analyzer is set, simply normalize and transliterate
+ norm_name = self._search_normalized(hnr.name)
+ if norm_name:
+ result = self._cache.housenumbers.get(norm_name, result)
+ if result[0] is None:
+ with self.conn.cursor() as cur:
+ cur.execute("SELECT getorcreate_hnr_id(%s)", (norm_name, ))
+ result = cur.fetchone()[0], norm_name
+ self._cache.housenumbers[norm_name] = result
+ else:
+ # Otherwise use the analyzer to determine the canonical name.
+ # Per convention we use the first variant as the 'lookup name', the
+ # name that gets saved in the housenumber field of the place.
+ norm_name = analyzer.normalize(hnr.name)
+ if norm_name:
+ result = self._cache.housenumbers.get(norm_name, result)
+ if result[0] is None:
+ variants = analyzer.get_variants_ascii(norm_name)
+ if variants:
+ with self.conn.cursor() as cur:
+ cur.execute("SELECT create_analyzed_hnr_id(%s, %s)",
+ (norm_name, list(variants)))
+ result = cur.fetchone()[0], variants[0]
+ self._cache.housenumbers[norm_name] = result
- if streets:
- token_info.add_street(streets)
+ return result
def _compute_partial_tokens(self, name):
for name in names:
analyzer_id = name.get_attr('analyzer')
- norm_name = self._normalized(name.name)
+ analyzer = self.token_analysis.get_analyzer(analyzer_id)
+ norm_name = analyzer.normalize(name.name)
if analyzer_id is None:
token_id = norm_name
else:
full, part = self._cache.names.get(token_id, (None, None))
if full is None:
- variants = self.token_analysis.analysis[analyzer_id].get_variants_ascii(norm_name)
+ variants = analyzer.get_variants_ascii(norm_name)
if not variants:
continue
with self.conn.cursor() as cur:
- cur.execute("SELECT (getorcreate_full_word(%s, %s)).*",
+ cur.execute("SELECT * FROM getorcreate_full_word(%s, %s)",
(token_id, variants))
full, part = cur.fetchone()
class _TokenInfo:
""" Collect token information to be sent back to the database.
"""
- def __init__(self, cache):
- self._cache = cache
- self.data = {}
+ def __init__(self):
+ self.names = None
+ self.housenumbers = set()
+ self.housenumber_tokens = set()
+ self.street_tokens = set()
+ self.place_tokens = set()
+ self.address_tokens = {}
+
@staticmethod
def _mk_array(tokens):
- return '{%s}' % ','.join((str(s) for s in tokens))
+ return f"{{{','.join((str(s) for s in tokens))}}}"
+
+
+ def to_dict(self):
+ """ Return the token information in database importable format.
+ """
+ out = {}
+
+ if self.names:
+ out['names'] = self.names
+
+ if self.housenumbers:
+ out['hnr'] = ';'.join(self.housenumbers)
+ out['hnr_tokens'] = self._mk_array(self.housenumber_tokens)
+
+ if self.street_tokens:
+ out['street'] = self._mk_array(self.street_tokens)
+ if self.place_tokens:
+ out['place'] = self._mk_array(self.place_tokens)
- def add_names(self, fulls, partials):
+ if self.address_tokens:
+ out['addr'] = self.address_tokens
+
+ return out
+
+
+ def set_names(self, fulls, partials):
""" Adds token information for the normalised names.
"""
- self.data['names'] = self._mk_array(itertools.chain(fulls, partials))
+ self.names = self._mk_array(itertools.chain(fulls, partials))
- def add_housenumbers(self, conn, hnrs):
+ def add_housenumber(self, token, hnr):
""" Extract housenumber information from a list of normalised
housenumbers.
"""
- self.data['hnr_tokens'] = self._mk_array(self._cache.get_hnr_tokens(conn, hnrs))
- self.data['hnr'] = ';'.join(hnrs)
+ if token:
+ self.housenumbers.add(hnr)
+ self.housenumber_tokens.add(token)
def add_street(self, tokens):
""" Add addr:street match terms.
"""
- self.data['street'] = self._mk_array(tokens)
+ self.street_tokens.update(tokens)
def add_place(self, tokens):
""" Add addr:place search and match terms.
"""
- if tokens:
- self.data['place'] = self._mk_array(tokens)
+ self.place_tokens.update(tokens)
- def add_address_terms(self, terms):
+ def add_address_term(self, key, partials):
""" Add additional address terms.
"""
- tokens = {key: self._mk_array(partials)
- for key, partials in terms if partials}
-
- if tokens:
- self.data['addr'] = tokens
+ if partials:
+ self.address_tokens[key] = self._mk_array(partials)
class _TokenCache:
self.fulls = {}
self.postcodes = set()
self.housenumbers = {}
-
-
- def get_hnr_tokens(self, conn, terms):
- """ Get token ids for a list of housenumbers, looking them up in the
- database if necessary. `terms` is an iterable of normalized
- housenumbers.
- """
- tokens = []
- askdb = []
-
- for term in terms:
- token = self.housenumbers.get(term)
- if token is None:
- askdb.append(term)
- else:
- tokens.append(token)
-
- if askdb:
- with conn.cursor() as cur:
- cur.execute("SELECT nr, getorcreate_hnr_id(nr) FROM unnest(%s) as nr",
- (askdb, ))
- for term, tid in cur:
- self.housenumbers[term] = tid
- tokens.append(tid)
-
- return tokens
simple_list = list(set(simple_list))
with conn.cursor() as cur:
- cur.execute("SELECT (create_housenumbers(%s)).* ", (simple_list, ))
+ cur.execute("SELECT * FROM create_housenumbers(%s)", (simple_list, ))
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
### Analysis section
-def create(transliterator, config):
+def create(normalizer, transliterator, config):
""" Create a new token analysis instance for this module.
"""
- return GenericTokenAnalysis(transliterator, config)
+ return GenericTokenAnalysis(normalizer, transliterator, config)
class GenericTokenAnalysis:
and provides the functions to apply the transformations.
"""
- def __init__(self, to_ascii, config):
+ def __init__(self, norm, to_ascii, config):
+ self.norm = norm
self.to_ascii = to_ascii
self.variant_only = config['variant_only']
self.mutations = [MutationVariantGenerator(*cfg) for cfg in config['mutations']]
+ def normalize(self, name):
+ """ Return the normalized form of the name. This is the standard form
+ from which possible variants for the name can be derived.
+ """
+ return self.norm.transliterate(name).strip()
+
+
def get_variants_ascii(self, norm_name):
""" Compute the spelling variants for the given normalized name
and transliterate the result.
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Specialized processor for housenumbers. Analyses common housenumber patterns
+and creates variants for them.
+"""
+import re
+
+from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
+
+RE_NON_DIGIT = re.compile('[^0-9]')
+RE_DIGIT_ALPHA = re.compile(r'(\d)\s*([^\d\s␣])')
+RE_ALPHA_DIGIT = re.compile(r'([^\s\d␣])\s*(\d)')
+RE_NAMED_PART = re.compile(r'[a-z]{4}')
+
+### Configuration section
+
+def configure(rules, normalization_rules): # pylint: disable=W0613
+ """ All behaviour is currently hard-coded.
+ """
+ return None
+
+### Analysis section
+
+def create(normalizer, transliterator, config): # pylint: disable=W0613
+ """ Create a new token analysis instance for this module.
+ """
+ return HousenumberTokenAnalysis(normalizer, transliterator)
+
+
+class HousenumberTokenAnalysis:
+ """ Detects common housenumber patterns and normalizes them.
+ """
+ def __init__(self, norm, trans):
+ self.norm = norm
+ self.trans = trans
+
+ self.mutator = MutationVariantGenerator('␣', (' ', ''))
+
+ def normalize(self, name):
+ """ Return the normalized form of the housenumber.
+ """
+ # shortcut for number-only numbers, which make up 90% of the data.
+ if RE_NON_DIGIT.search(name) is None:
+ return name
+
+ norm = self.trans.transliterate(self.norm.transliterate(name))
+ # If there is a significant non-numeric part, use as is.
+ if RE_NAMED_PART.search(norm) is None:
+ # Otherwise add optional spaces between digits and letters.
+ (norm_opt, cnt1) = RE_DIGIT_ALPHA.subn(r'\1␣\2', norm)
+ (norm_opt, cnt2) = RE_ALPHA_DIGIT.subn(r'\1␣\2', norm_opt)
+ # Avoid creating too many variants per number.
+ if cnt1 + cnt2 <= 4:
+ return norm_opt
+
+ return norm
+
+ def get_variants_ascii(self, norm_name):
+ """ Compute the spelling variants for the given normalized housenumber.
+
+ Generates variants for optional spaces (marked with '␣').
+ """
+ return list(self.mutator.generate([norm_name]))
status.set_status(conn, date=date, seq=seq)
- LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
+ LOG.warning("Updates initialised at sequence %s (%s)", seq, date)
def check_for_updates(conn, base_url):
# HTTPS_PROXY="http://user:pass@10.10.1.10:1080"
# Location of the osm2pgsql binary.
-# When empty, osm2pgsql is expected to reside in the osm2pgsql directory in
-# the project directory.
# EXPERT ONLY. You should usually use the supplied osm2pgsql.
NOMINATIM_OSM2PGSQL_BINARY=
mode: append
token-analysis:
- analyzer: generic
+ - id: "@housenumber"
+ analyzer: housenumbers
- id: bg
analyzer: generic
mode: variant-only
| | | | | 4 |
- Scenario: A simple numeral housenumber is found
+ Scenario: A simple ascii digit housenumber is found
Given the places
- | osm | class | type | housenr | geometry |
- | N1 | building | yes | 45 | 9 |
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | 45 | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | highway | path | North Road | 1,2,3 |
| N1 |
+ @fail-legacy
+ Scenario Outline: Numeral housenumbers in any script are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <number> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | North Road | 1,2,3 |
+ When importing
+ And sending search query "45, North Road"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "North Road ④⑤"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "North Road 𑁪𑁫"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | number |
+ | 45 |
+ | ④⑤ |
+ | 𑁪𑁫 |
+
+
Scenario Outline: Each housenumber in a list is found
Given the places
| osm | class | type | housenr | geometry |
| 2, 4, 12 |
+ @fail-legacy
+ Scenario Outline: Housenumber - letter combinations are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Multistr | 1,2,3 |
+ When importing
+ When sending search query "2A Multistr"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "2 a Multistr"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "2-A Multistr"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Multistr 2 A"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 2a |
+ | 2 A |
+ | 2-a |
+ | 2/A |
+
+
+ Scenario Outline: Number - Number combinations as a housenumber are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Chester St | 1,2,3 |
+ When importing
+ When sending search query "34-10 Chester St"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "34/10 Chester St"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "34 10 Chester St"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "3410 Chester St"
+ Then results contain
+ | osm |
+ | W10 |
+
+ Examples:
+ | hnr |
+ | 34-10 |
+ | 34 10 |
+ | 34/10 |
+
+
+ @fail-legacy
+ Scenario Outline: a bis housenumber is found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Rue Paris | 1,2,3 |
+ When importing
+ When sending search query "Rue Paris 45bis"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue Paris 45 BIS"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue Paris 45BIS"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue Paris 45 bis"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 45bis |
+ | 45BIS |
+ | 45 BIS |
+ | 45 bis |
+
+
+ @fail-legacy
+ Scenario Outline: a ter housenumber is found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Rue du Berger | 1,2,3 |
+ When importing
+ When sending search query "Rue du Berger 45ter"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue du Berger 45 TER"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue du Berger 45TER"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue du Berger 45 ter"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 45ter |
+ | 45TER |
+ | 45 ter |
+ | 45 TER |
+
+
+ @fail-legacy
+ Scenario Outline: a number - letter - number combination housenumber is found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Herengracht | 1,2,3 |
+ When importing
+ When sending search query "501-H 1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "501H-1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "501H1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "501-H1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 501 H1 |
+ | 501H 1 |
+ | 501/H/1 |
+ | 501h1 |
+
+
+ @fail-legacy
+ Scenario Outline: Russian housenumbers are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Голубинская улица | 1,2,3 |
+ When importing
+ When sending search query "Голубинская улица 55к3"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Голубинская улица 55 k3"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Голубинская улица 55 к-3"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 55к3 |
+ | 55 к3 |
+
+
Scenario: A name mapped as a housenumber is found
Given the places
| osm | class | type | housenr | geometry |
--- /dev/null
+@DB
+Feature: Query of address interpolations
+ Tests that interpolated addresses can be queried correctly
+
+ Background:
+ Given the grid
+ | 1 | | 2 | | 3 |
+ | 10 | | 12 | | 13 |
+ | 7 | | 8 | | 9 |
+
+ Scenario: Find interpolations with single number
+ Given the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | primary | Nickway | 10,12,13 |
+ And the places
+ | osm | class | type | addr+interpolation | geometry |
+ | W1 | place | houses | odd | 1,3 |
+ And the places
+ | osm | class | type | housenr | geometry |
+ | N1 | place | house | 1 | 1 |
+ | N3 | place | house | 5 | 3 |
+ And the ways
+ | id | nodes |
+ | 1 | 1,3 |
+ When importing
+ When sending jsonv2 reverse point 2
+ Then results contain
+ | ID | display_name |
+ | 0 | 3, Nickway |
+ When sending search query "Nickway 3"
+ Then results contain
+ | osm | display_name |
+ | W1 | 3, Nickway |
+
+
+ Scenario: Find interpolations with multiple numbers
+ Given the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | primary | Nickway | 10,12,13 |
+ And the places
+ | osm | class | type | addr+interpolation | geometry |
+ | W1 | place | houses | even | 1,3 |
+ And the places
+ | osm | class | type | housenr | geometry |
+ | N1 | place | house | 2 | 1 |
+ | N3 | place | house | 16 | 3 |
+ And the ways
+ | id | nodes |
+ | 1 | 1,3 |
+ When importing
+ When sending jsonv2 reverse point 2
+ Then results contain
+ | ID | display_name | centroid |
+ | 0 | 10, Nickway | 2 |
+ When sending search query "Nickway 10"
+ Then results contain
+ | osm | display_name | centroid |
+ | W1 | 10, Nickway | 2 |
if errorcode == 200 and fmt != 'debug':
getattr(self, '_parse_' + fmt)()
+ else:
+ print("Bad response: ", page)
def _parse_json(self):
m = re.fullmatch(r'([\w$][^(]*)\((.*)\)', self.page)
"\nBad value for row {} field '{}' in address. Expected: {}, got: {}.\nFull address: {}"""\
.format(idx, field, value, address[field], json.dumps(address, indent=4))
- def match_row(self, row):
+ def match_row(self, row, context=None):
""" Match the result fields against the given behave table row.
"""
if 'ID' in row.headings:
assert self.result[i]['osm_type'] in (OSM_TYPE[value[0]], value[0]), \
BadRowValueAssert(self, i, 'osm_type', value)
elif name == 'centroid':
- lon, lat = value.split(' ')
+ if ' ' in value:
+ lon, lat = value.split(' ')
+ elif context is not None:
+ lon, lat = context.osm.grid_node(int(value))
+ else:
+ raise RuntimeError("Context needed when using grid coordinates")
self.assert_field(i, 'lat', float(lat))
self.assert_field(i, 'lon', float(lon))
else:
context.response = ReverseResponse(outp, fmt or 'xml', status)
+@when(u'sending (?P<fmt>\S+ )?reverse point (?P<nodeid>.+)')
+def website_reverse_request(context, fmt, nodeid):
+ params = {}
+ if fmt and fmt.strip() == 'debug':
+ params['debug'] = '1'
+ params['lon'], params['lat'] = (f'{c:f}' for c in context.osm.grid_node(int(nodeid)))
+
+
+ outp, status = send_api_query('reverse', params, fmt, context)
+
+ context.response = ReverseResponse(outp, fmt or 'xml', status)
+
+
+
@when(u'sending (?P<fmt>\S+ )?details query for (?P<query>.*)')
def website_details_request(context, fmt, query):
params = {}
context.execute_steps("then at least 1 result is returned")
for line in context.table:
- context.response.match_row(line)
+ context.response.match_row(line, context=context)
@then(u'result (?P<lid>\d+ )?has (?P<neg>not )?attributes (?P<attrs>.*)')
def validate_attributes(context, lid, neg, attrs):
self.conn.commit()
- def add_housenumber(self, word_id, word_token):
+ def add_housenumber(self, word_id, word_tokens, word=None):
with self.conn.cursor() as cur:
- cur.execute("""INSERT INTO word (word_id, word_token, type)
- VALUES (%s, %s, 'H')
- """, (word_id, word_token))
+ if isinstance(word_tokens, str):
+ # old style without analyser
+ cur.execute("""INSERT INTO word (word_id, word_token, type)
+ VALUES (%s, %s, 'H')
+ """, (word_id, word_tokens))
+ else:
+ if word is None:
+ word = word_tokens[0]
+ for token in word_tokens:
+ cur.execute("""INSERT INTO word (word_id, word_token, type, word, info)
+ VALUES (%s, %s, 'H', %s, jsonb_build_object('lookup', %s))
+ """, (word_id, token, word, word_tokens[0]))
+
self.conn.commit()
def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',),
variants=('~gasse -> gasse', 'street => st', ),
- sanitizers=[]):
+ sanitizers=[], with_housenumber=False):
cfgstr = {'normalization': list(norm),
'sanitizers': sanitizers,
'transliteration': list(trans),
'token-analysis': [{'analyzer': 'generic',
'variants': [{'words': list(variants)}]}]}
+ if with_housenumber:
+ cfgstr['token-analysis'].append({'id': '@housenumber',
+ 'analyzer': 'housenumbers'})
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr))
tok.loader = nominatim.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
assert 'addr' not in info
+class TestPlaceHousenumberWithAnalyser:
+
+ @pytest.fixture(autouse=True)
+ def setup(self, analyzer, sql_functions):
+ hnr = {'step': 'clean-housenumbers',
+ 'filter-kind': ['housenumber', 'conscriptionnumber', 'streetnumber']}
+ with analyzer(trans=(":: upper()", "'🜵' > ' '"), sanitizers=[hnr], with_housenumber=True) as anl:
+ self.analyzer = anl
+ yield anl
+
+
+ @pytest.fixture
+ def getorcreate_hnr_id(self, temp_db_cursor):
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_analyzed_hnr_id(norm_term TEXT, lookup_terms TEXT[])
+ RETURNS INTEGER AS $$
+ SELECT -nextval('seq_word')::INTEGER; $$ LANGUAGE SQL""")
+
+
+ def process_address(self, **kwargs):
+ return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
+
+
+ def name_token_set(self, *expected_terms):
+ tokens = self.analyzer.get_word_token_info(expected_terms)
+ for token in tokens:
+ assert token[2] is not None, "No token for {0}".format(token)
+
+ return set((t[2] for t in tokens))
+
+
+ @pytest.mark.parametrize('hnr', ['123 a', '1', '101'])
+ def test_process_place_housenumbers_simple(self, hnr, getorcreate_hnr_id):
+ info = self.process_address(housenumber=hnr)
+
+ assert info['hnr'] == hnr.upper()
+ assert info['hnr_tokens'] == "{-1}"
+
+
+ def test_process_place_housenumbers_duplicates(self, getorcreate_hnr_id):
+ info = self.process_address(housenumber='134',
+ conscriptionnumber='134',
+ streetnumber='99a')
+
+ assert set(info['hnr'].split(';')) == set(('134', '99 A'))
+ assert info['hnr_tokens'] == "{-1,-2}"
+
+
+ def test_process_place_housenumbers_cached(self, getorcreate_hnr_id):
+ info = self.process_address(housenumber="45")
+ assert info['hnr_tokens'] == "{-1}"
+
+ info = self.process_address(housenumber="46")
+ assert info['hnr_tokens'] == "{-2}"
+
+ info = self.process_address(housenumber="41;45")
+ assert eval(info['hnr_tokens']) == {-1, -3}
+
+ info = self.process_address(housenumber="41")
+ assert eval(info['hnr_tokens']) == {-3}
+
+
class TestUpdateWordTokens:
@pytest.fixture(autouse=True)
return _insert
+ @pytest.fixture(params=['simple', 'analyzed'])
+ def add_housenumber(self, request, word_table):
+ if request.param == 'simple':
+ def _make(hid, hnr):
+ word_table.add_housenumber(hid, hnr)
+ elif request.param == 'analyzed':
+ def _make(hid, hnr):
+ word_table.add_housenumber(hid, [hnr])
+
+ return _make
+
+
@pytest.mark.parametrize('hnr', ('1a', '1234567', '34 5'))
- def test_remove_unused_housenumbers(self, word_table, hnr):
+ def test_remove_unused_housenumbers(self, add_housenumber, word_table, hnr):
word_table.add_housenumber(1000, hnr)
assert word_table.count_housenumbers() == 1
assert word_table.count_housenumbers() == 0
- def test_keep_unused_numeral_housenumbers(self, word_table):
- word_table.add_housenumber(1000, '5432')
+ def test_keep_unused_numeral_housenumbers(self, add_housenumber, word_table):
+ add_housenumber(1000, '5432')
assert word_table.count_housenumbers() == 1
self.tok.update_word_tokens()
assert word_table.count_housenumbers() == 1
- def test_keep_housenumbers_from_search_name_table(self, word_table, search_entry):
- word_table.add_housenumber(9999, '5432a')
- word_table.add_housenumber(9991, '9 a')
+ def test_keep_housenumbers_from_search_name_table(self, add_housenumber, word_table, search_entry):
+ add_housenumber(9999, '5432a')
+ add_housenumber(9991, '9 a')
search_entry(123, 9999, 34)
assert word_table.count_housenumbers() == 2
assert word_table.count_housenumbers() == 1
- def test_keep_housenumbers_from_placex_table(self, word_table, placex_table):
- word_table.add_housenumber(9999, '5432a')
- word_table.add_housenumber(9990, '34z')
+ def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table, placex_table):
+ add_housenumber(9999, '5432a')
+ add_housenumber(9990, '34z')
placex_table.add(housenumber='34z')
placex_table.add(housenumber='25432a')
assert word_table.count_housenumbers() == 1
- def test_keep_housenumbers_from_placex_table_hnr_list(self, word_table, placex_table):
- word_table.add_housenumber(9991, '9 b')
- word_table.add_housenumber(9990, '34z')
+ def test_keep_housenumbers_from_placex_table_hnr_list(self, add_housenumber, word_table, placex_table):
+ add_housenumber(9991, '9 b')
+ add_housenumber(9990, '34z')
placex_table.add(housenumber='9 a;9 b;9 c')
assert word_table.count_housenumbers() == 2
rules['mode'] = 'variant-only'
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
- return module.create(trans, config)
+ return module.create(norm, trans, config)
def get_normalized_variants(proc, name):
rules = { 'analyzer': 'generic' }
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
- proc = module.create(trans, config)
+ proc = module.create(norm, trans, config)
assert get_normalized_variants(proc, '大德!') == ['dà dé']
}
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
- self.analysis = module.create(trans, config)
+ self.analysis = module.create(norm, trans, config)
def variants(self, name):
#
if [ "x$1" == "xyes" ]; then #DOCS: :::sh
cd $USERHOME
- git clone --recursive git://github.com/openstreetmap/Nominatim.git
+ git clone --recursive https://github.com/openstreetmap/Nominatim.git
cd Nominatim
else #DOCS:
cd $USERHOME/Nominatim #DOCS:
#
if [ "x$1" == "xyes" ]; then #DOCS: :::sh
cd $USERHOME
- git clone --recursive git://github.com/openstreetmap/Nominatim.git
+ git clone --recursive https://github.com/openstreetmap/Nominatim.git
cd Nominatim
else #DOCS:
cd $USERHOME/Nominatim #DOCS:
#
if [ "x$1" == "xyes" ]; then #DOCS: :::sh
cd $USERHOME
- git clone --recursive git://github.com/openstreetmap/Nominatim.git
+ git clone --recursive https://github.com/openstreetmap/Nominatim.git
cd Nominatim
else #DOCS:
cd $USERHOME/Nominatim #DOCS: