shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
- sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson
+ sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg
else
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
+ * [nlohmann/json](https://json.nlohmann.me/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
$sPlaceId = $oParams->getString('place_id');
$sOsmType = $oParams->getSet('osmtype', array('N', 'W', 'R'));
-$iOsmId = $oParams->getInt('osmid', -1);
+$iOsmId = $oParams->getInt('osmid', 0);
$sClass = $oParams->getString('class');
$bIncludeKeywords = $oParams->getBool('keywords', false);
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
-if ($sOsmType && $iOsmId > 0) {
+if ($sOsmType && $iOsmId !== 0) {
$sSQL = 'SELECT place_id FROM placex WHERE osm_type = :type AND osm_id = :id';
$aSQLParams = array(':type' => $sOsmType, ':id' => $iOsmId);
// osm_type and osm_id are not unique enough
END IF;
RETURN ST_Envelope(ST_Collect(
- ST_Project(geom, radius, 0.785398)::geometry,
- ST_Project(geom, radius, 3.9269908)::geometry));
+ ST_Project(geom::geography, radius, 0.785398)::geometry,
+ ST_Project(geom::geography, radius, 3.9269908)::geometry));
END;
$$
LANGUAGE plpgsql IMMUTABLE;
"""
Extended SQLAlchemy connection class that also includes access to the schema.
"""
-from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set
+from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
+ Awaitable, Callable, TypeVar
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.logging import log
+T = TypeVar('T')
+
class SearchConnection:
""" An extended SQLAlchemy connection class, that also contains
then table definitions. The underlying asynchronous SQLAlchemy
Raises a ValueError if the property does not exist.
"""
- if name.startswith('DB:'):
- raise ValueError(f"Illegal property value '{name}'.")
+ lookup_name = f'DBPROP:{name}'
- if cached and name in self._property_cache:
- return cast(str, self._property_cache[name])
+ if cached and lookup_name in self._property_cache:
+ return cast(str, self._property_cache[lookup_name])
sql = sa.select(self.t.properties.c.value)\
.where(self.t.properties.c.property == name)
if value is None:
raise ValueError(f"Property '{name}' not found in database.")
- self._property_cache[name] = cast(str, value)
+ self._property_cache[lookup_name] = cast(str, value)
return cast(str, value)
return self._property_cache['DB:server_version']
+ async def get_cached_value(self, group: str, name: str,
+ factory: Callable[[], Awaitable[T]]) -> T:
+ """ Access the cache for this Nominatim instance.
+ Each cache value needs to belong to a group and have a name.
+ This function is for internal API use only.
+
+ `factory` is an async callback function that produces
+ the value if it is not already cached.
+
+ Returns the cached value or the result of factory (also caching
+ the result).
+ """
+ full_name = f'{group}:{name}'
+
+ if full_name in self._property_cache:
+ return cast(T, self._property_cache[full_name])
+
+ value = await factory()
+ self._property_cache[full_name] = value
+
+ return value
+
+
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
""" Lookup up if there is a classtype table for the given category
and return a SQLAlchemy table for it, if it exists.
out = []
if details.geometry_simplification > 0.0:
- col = col.ST_SimplifyPreserveTopology(details.geometry_simplification)
+ col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
- out.append(col.ST_AsGeoJSON().label('geometry_geojson'))
+ out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
if details.geometry_output & ntyp.GeometryFormat.TEXT:
- out.append(col.ST_AsText().label('geometry_text'))
+ out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & ntyp.GeometryFormat.KML:
- out.append(col.ST_AsKML().label('geometry_kml'))
+ out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
if details.geometry_output & ntyp.GeometryFormat.SVG:
- out.append(col.ST_AsSVG().label('geometry_svg'))
+ out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
return sql.add_columns(*out)
def _is_address_point(table: SaFromClause) -> SaColumn:
return sa.and_(table.c.rank_address == 30,
sa.or_(table.c.housenumber != None,
- table.c.name.has_key('housename')))
+ table.c.name.has_key('addr:housename')))
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
penalty = min(categories.penalties)
categories.penalties = [p - penalty for p in categories.penalties]
for search in builder:
- yield dbs.NearSearch(penalty, categories, search)
+ yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
else:
- yield from builder
+ for search in builder:
+ search.penalty += assignment.penalty
+ yield search
def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
and all(t.is_indexed for t in addr_partials)
exp_count = min(t.count for t in name_partials)
- if (len(name_partials) > 3 or exp_count < 1000) and partials_indexed:
+ if (len(name_partials) > 3 or exp_count < 3000) and partials_indexed:
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
return
- exp_count = min(exp_count, min(t.count for t in addr_partials)) \
- if addr_partials else exp_count
+ exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, TokenType.WORD)
orexpr.append(table.c.rank_address.between(1, 29))
orexpr.append(sa.and_(table.c.rank_address == 30,
sa.or_(table.c.housenumber != None,
- table.c.address.has_key('housename'))))
+ table.c.address.has_key('addr:housename'))))
elif layers & DataLayer.POI:
orexpr.append(sa.and_(table.c.rank_address == 30,
table.c.class_.not_in(('place', 'building'))))
"""
def __init__(self, sdata: SearchData) -> None:
super().__init__(sdata.penalty)
- self.categories = sdata.qualifiers
+ self.qualifiers = sdata.qualifiers
self.countries = sdata.countries
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
.limit(LIMIT_PARAM)
- classtype = self.categories.values
+ classtype = self.qualifiers.values
if len(classtype) == 1:
cclass, ctype = classtype[0]
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
rows.extend(await conn.execute(sql, bind_params))
else:
# use the class type tables
- for category in self.categories.values:
+ for category in self.qualifiers.values:
table = await conn.get_class_table(*category)
if table is not None:
sql = _select_placex(t)\
for row in rows:
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
- result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+ result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+ result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
return results or await self.lookup_in_country_table(conn, details)
if details.viewbox is not None:
if details.bounded_viewbox:
- sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+ if details.viewbox.area < 0.2:
+ sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
if details.near is not None:
if details.near_radius is not None:
- sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
- sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
+ if details.near_radius < 0.1:
+ sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+ NEAR_RADIUS_PARAM))
+ sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance')))
else:
.where(thnr.c.indexed_status == 0)
if details.excluded:
- place_sql = place_sql.where(_exclude_places(thnr))
+ place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
if self.qualifiers:
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
# pylint: disable=invalid-name,too-many-locals
def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
start: int = 0) -> Iterator[Optional[List[Any]]]:
- yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
+ yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
+ 'Qualifier', 'Catgeory', 'Rankings']
def tk(tl: List[int]) -> str:
tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
for search in searches[start:]:
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
- 'postcodes', 'qualifier')
- iters = itertools.zip_longest([f"{search.penalty:.3g}"],
- *(getattr(search, attr, []) for attr in fields),
- fillvalue= '')
- for penalty, lookup, rank, cc, hnr, pc, qual in iters:
+ 'postcodes', 'qualifiers')
+ if hasattr(search, 'search'):
+ iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+ *(getattr(search.search, attr, []) for attr in fields),
+ getattr(search, 'categories', []),
+ fillvalue='')
+ else:
+ iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+ *(getattr(search, attr, []) for attr in fields),
+ [],
+ fillvalue='')
+ for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
- fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
+ fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
yield None
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
distance = 0
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
- if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
+ if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
distance += 1
elif tag == 'replace':
distance += max((ato-afrom), (bto-bfrom))
async def setup(self) -> None:
""" Set up static data structures needed for the analysis.
"""
- rules = await self.conn.get_property('tokenizer_import_normalisation')
- self.normalizer = Transliterator.createFromRules("normalization", rules)
- rules = await self.conn.get_property('tokenizer_import_transliteration')
- self.transliterator = Transliterator.createFromRules("transliteration", rules)
+ async def _make_normalizer() -> Any:
+ rules = await self.conn.get_property('tokenizer_import_normalisation')
+ return Transliterator.createFromRules("normalization", rules)
+
+ self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
+ _make_normalizer)
+
+ async def _make_transliterator() -> Any:
+ rules = await self.conn.get_property('tokenizer_import_transliteration')
+ return Transliterator.createFromRules("transliteration", rules)
+
+ self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
+ _make_transliterator)
if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta,
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
+ if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq):
+ self.penalty += 1.0
return True
if result.address_rows is not None:
_add_address_rows(out, 'address', result.address_rows, locales)
- if result.linked_rows is not None:
+ if result.linked_rows:
_add_address_rows(out, 'linked_places', result.linked_rows, locales)
if result.name_keywords is not None or result.address_keywords is not None:
result = await api.details(place,
address_details=params.get_bool('addressdetails', False),
- linked_places=params.get_bool('linkedplaces', False),
+ linked_places=params.get_bool('linkedplaces', True),
parented_places=params.get_bool('hierarchy', False),
keywords=params.get_bool('keywords', False),
geometry_output = napi.GeometryFormat.GEOJSON
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
- return sa.func.ST_DWithin(self, other, distance, type_=sa.Float)
+ return sa.func.ST_DWithin(self, other, distance, type_=sa.Boolean)
+
+
+ def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+ return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
+ other, distance, type_=sa.Boolean)
+
+
+ def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
+ return sa.func.coalesce(sa.null(), self).op('&&')(other)
def ST_Distance(self, other: SaColumn) -> SaColumn:
-Subproject commit 4facd1aea451cea220261c361698b8e5f18a9327
+Subproject commit ea0178e97d5b69a87a8b9c35210c8be4674e60e6
search = searches[0]
assert isinstance(search, dbs.PoiSearch)
- assert search.categories.values == [('this', 'that')]
+ assert search.qualifiers.values == [('this', 'that')]
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
def test_too_frequent_partials_in_name_and_address():
- searches = make_counted_searches(10000, 1, 10000, 1)
+ searches = make_counted_searches(20000, 1, 10000, 1)
assert len(searches) == 1
assert len(result) == 2
assert set(r.place_id for r in result) == {332, 4924}
+
+
+@pytest.mark.parametrize('gtype', list(napi.GeometryFormat))
+def test_simple_place_with_geometry(apiobj, gtype):
+ apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+ class_='highway', type='residential',
+ name={'name': 'Road'}, address={'city': 'Barrow'},
+ extratags={'surface': 'paved'},
+ parent_place_id=34, linked_place_id=55,
+ admin_level=15, country_code='gb',
+ housenumber='4',
+ postcode='34425', wikipedia='en:Faa',
+ rank_search=27, rank_address=26,
+ importance=0.01,
+ centroid=(23, 34),
+ geometry='POLYGON((23 34, 23.1 34, 23.1 34.1, 23 34))')
+
+ result = apiobj.api.lookup([napi.OsmID('W', 4)],
+ geometry_output=gtype)
+
+ assert len(result) == 1
+ assert result[0].place_id == 332
+
+ if gtype == napi.GeometryFormat.NONE:
+ assert list(result[0].geometry.keys()) == []
+ else:
+ assert list(result[0].geometry.keys()) == [gtype.name.lower()]
+
+
+def test_simple_place_with_geometry_simplified(apiobj):
+ apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+ class_='highway', type='residential',
+ name={'name': 'Road'}, address={'city': 'Barrow'},
+ extratags={'surface': 'paved'},
+ parent_place_id=34, linked_place_id=55,
+ admin_level=15, country_code='gb',
+ housenumber='4',
+ postcode='34425', wikipedia='en:Faa',
+ rank_search=27, rank_address=26,
+ importance=0.01,
+ centroid=(23, 34),
+ geometry='POLYGON((23 34, 22.999 34, 23.1 34, 23.1 34.1, 23 34))')
+
+ result = apiobj.api.lookup([napi.OsmID('W', 4)],
+ geometry_output=napi.GeometryFormat.TEXT,
+ geometry_simplification=0.1)
+
+ assert len(result) == 1
+ assert result[0].place_id == 332
+ assert result[0].geometry == {'text': 'POLYGON((23 34,23.1 34,23.1 34.1,23 34))'}
+
(0.7, napi.DataLayer.RAILWAY, 226),
(0.7, napi.DataLayer.NATURAL, 227),
(0.70003, napi.DataLayer.MANMADE | napi.DataLayer.RAILWAY, 225),
- (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225)])
+ (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225),
+ (5, napi.DataLayer.ADDRESS, 229)])
def test_reverse_rank_30_layers(apiobj, y, layer, place_id):
apiobj.add_placex(place_id=223, class_='place', type='house',
housenumber='1',
rank_address=0,
rank_search=30,
centroid=(1.3, 0.70005))
+ apiobj.add_placex(place_id=229, class_='place', type='house',
+ name={'addr:housename': 'Old Cottage'},
+ rank_address=30,
+ rank_search=30,
+ centroid=(1.3, 5))
assert apiobj.api.reverse((1.3, y), layers=layer).place_id == place_id
sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev \
libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
- postgresql-12-postgis-3 \
+ nlohmann-json3-dev postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php-cli php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 python3-pip \
sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev \
libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
- postgresql-server-dev-14 postgresql-14-postgis-3 \
+ nlohmann-json3-dev postgresql-14-postgis-3 \
postgresql-contrib-14 postgresql-14-postgis-3-scripts \
php-cli php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 \
python3-icu python3-datrie python3-sqlalchemy \
- python3-asyncpg git
+ python3-asyncpg python3-yaml git
#
# System Configuration