This is mostly for convenience and documentation purposes.
--- /dev/null
+"""
+Wrapper around place information the indexer gets from the database and hands to
+the tokenizer.
+"""
+
+import psycopg2.extras
+
+class PlaceInfo:
+ """ Data class containing all information the tokenizer gets about a
+ place it should process the names for.
+ """
+
+ def __init__(self, info):
+ self._info = info
+
+
+ def analyze(self, analyzer):
+ """ Process this place with the given tokenizer and return the
+ result in psycopg2-compatible Json.
+ """
+ return psycopg2.extras.Json(analyzer.process_place(self))
+
+
+ @property
+ def name(self):
+ """ A dictionary with the names of the place or None if the place
+ has no names.
+ """
+ return self._info.get('name')
+
+
+ @property
+ def address(self):
+ """ A dictionary with the address elements of the place
+ or None if no address information is available.
+ """
+ return self._info.get('address')
+
+
+ @property
+ def country_feature(self):
+ """ Return the country code if the place is a valid country boundary.
+ """
+ return self._info.get('country_feature')
"""
import functools
-import psycopg2.extras
from psycopg2 import sql as pysql
+from nominatim.indexer.place_info import PlaceInfo
+
# pylint: disable=C0111
def _mk_valuelist(template, num):
return pysql.SQL(',').join([pysql.SQL(template)] * num)
+
class AbstractPlacexRunner:
""" Returns SQL commands for indexing of the placex table.
"""
for place in places:
for field in ('place_id', 'name', 'address', 'linked_place_id'):
values.append(place[field])
- values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+ values.append(PlaceInfo(place).analyze(self.analyzer))
worker.perform(self._index_sql(len(places)), values)
values = []
for place in places:
values.extend((place[x] for x in ('place_id', 'address')))
- values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+ values.append(PlaceInfo(place).analyze(self.analyzer))
worker.perform(self._index_sql(len(places)), values)
from typing import List, Tuple, Dict, Any
from nominatim.config import Configuration
+from nominatim.indexer.place_info import PlaceInfo
# pylint: disable=unnecessary-pass
@abstractmethod
- def process_place(self, place: Dict) -> Any:
+ def process_place(self, place: PlaceInfo) -> Any:
""" Extract tokens for the given place and compute the
information to be handed to the PL/pgSQL processor for building
the search index.
Arguments:
- place: Dictionary with the information about the place. Currently
- the following fields may be present:
-
- - *name* is a dictionary of names for the place together
- with the designation of the name.
- - *address* is a dictionary of address terms.
- - *country_feature* is set to a country code when the
- place describes a country.
+ place: Place information retrived from the database.
Returns:
A JSON-serialisable structure that will be handed into
"""
token_info = _TokenInfo(self._cache)
- names = place.get('name')
+ names = place.name
if names:
fulls, partials = self._compute_name_tokens(names)
token_info.add_names(fulls, partials)
- country_feature = place.get('country_feature')
+ country_feature = place.country_feature
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
self.add_country_names(country_feature.lower(), names)
- address = place.get('address')
+ address = place.address
if address:
self._process_place_address(token_info, address)
"""
token_info = _TokenInfo(self._cache)
- names = place.get('name')
+ names = place.name
if names:
token_info.add_names(self.conn, names)
- country_feature = place.get('country_feature')
+ country_feature = place.country_feature
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
self.add_country_names(country_feature.lower(), names)
- address = place.get('address')
+ address = place.address
if address:
self._process_place_address(token_info, address)
import os
import tarfile
-import psycopg2.extras
-
from nominatim.db.connection import connect
from nominatim.db.async_connection import WorkerPool
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.errors import UsageError
+from nominatim.indexer.place_info import PlaceInfo
LOG = logging.getLogger()
address = dict(street=row['street'], postcode=row['postcode'])
args = ('SRID=4326;' + row['geometry'],
int(row['from']), int(row['to']), row['interpolation'],
- psycopg2.extras.Json(analyzer.process_place(dict(address=address))),
+ PlaceInfo({'address': address}).analyze(analyzer),
analyzer.normalize_postcode(row['postcode']))
except ValueError:
continue
"""
Tokenizer for testing.
"""
+from nominatim.indexer.place_info import PlaceInfo
def create(dsn, data_dir):
""" Create a new instance of the tokenizer provided by this module.
@staticmethod
def process_place(place):
+ assert isinstance(place, PlaceInfo)
return {}
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
from nominatim.db import properties
from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.indexer.place_info import PlaceInfo
from mock_icu_word_table import MockIcuWordTable
assert eval(info['names']) == set((t[2] for t in tokens))
+ def process_named_place(self, names, country_feature=None):
+ place = {'name': names}
+ if country_feature:
+ place['country_feature'] = country_feature
+
+ return self.analyzer.process_place(PlaceInfo(place))
+
+
def test_simple_names(self):
- info = self.analyzer.process_place({'name': {'name': 'Soft bAr', 'ref': '34'}})
+ info = self.process_named_place({'name': 'Soft bAr', 'ref': '34'})
self.expect_name_terms(info, '#Soft bAr', '#34', 'Soft', 'bAr', '34')
@pytest.mark.parametrize('sep', [',' , ';'])
def test_names_with_separator(self, sep):
- info = self.analyzer.process_place({'name': {'name': sep.join(('New York', 'Big Apple'))}})
+ info = self.process_named_place({'name': sep.join(('New York', 'Big Apple'))})
self.expect_name_terms(info, '#New York', '#Big Apple',
'new', 'york', 'big', 'apple')
def test_full_names_with_bracket(self):
- info = self.analyzer.process_place({'name': {'name': 'Houseboat (left)'}})
+ info = self.process_named_place({'name': 'Houseboat (left)'})
self.expect_name_terms(info, '#Houseboat (left)', '#Houseboat',
'houseboat', 'left')
def test_country_name(self, word_table):
- info = self.analyzer.process_place({'name': {'name': 'Norge'},
- 'country_feature': 'no'})
+ info = self.process_named_place({'name': 'Norge'}, country_feature='no')
self.expect_name_terms(info, '#norge', 'norge')
assert word_table.get_country() == {('no', 'NORGE')}
def process_address(self, **kwargs):
- return self.analyzer.process_place({'address': kwargs})
+ return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
def name_token_set(self, *expected_terms):
import pytest
+from nominatim.indexer.place_info import PlaceInfo
from nominatim.tokenizer import legacy_tokenizer
from nominatim.db import properties
from nominatim.errors import UsageError
def test_process_place_names(analyzer, make_keywords):
- info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+ info = analyzer.process_place(PlaceInfo({'name' : {'name' : 'Soft bAr', 'ref': '34'}}))
assert info['names'] == '{1,2,3}'
@pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])
def test_process_place_postcode(analyzer, create_postcode_id, word_table, pcode):
- analyzer.process_place({'address': {'postcode' : pcode}})
+ analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
assert word_table.get_postcodes() == {pcode, }
@pytest.mark.parametrize('pcode', ['12:23', 'ab;cd;f', '123;836'])
def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table, pcode):
- analyzer.process_place({'address': {'postcode' : pcode}})
+ analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
assert not word_table.get_postcodes()
@staticmethod
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
def test_process_place_housenumbers_simple(analyzer, hnr):
- info = analyzer.process_place({'address': {'housenumber' : hnr}})
+ info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : hnr}}))
assert info['hnr'] == hnr
assert info['hnr_tokens'].startswith("{")
@staticmethod
def test_process_place_housenumbers_lists(analyzer):
- info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+ info = analyzer.process_place(PlaceInfo({'address': {'conscriptionnumber' : '1; 2;3'}}))
assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
@staticmethod
def test_process_place_housenumbers_duplicates(analyzer):
- info = analyzer.process_place({'address': {'housenumber' : '134',
+ info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : '134',
'conscriptionnumber' : '134',
- 'streetnumber' : '99a'}})
+ 'streetnumber' : '99a'}}))
assert set(info['hnr'].split(';')) == set(('134', '99a'))