--- /dev/null
+github: lonvia
+custom: "https://nominatim.org/funding/"
| Version | End of support for security updates |
| ------- | ----------------------------------- |
+| 4.1.x | 2024-08-05 |
| 4.0.x | 2023-11-02 |
| 3.7.x | 2023-04-05 |
| 3.6.x | 2022-12-12 |
-| 3.5.x | 2022-06-05 |
## Reporting a Vulnerability
* `ref` - content of `ref` tag if it exists
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
- * `place_rank` - class [search rank](../develop/Ranking#search-rank)
- * `address_rank` - place [address rank](../develop/Ranking#address-rank)
+ * `place_rank` - class [search rank](../customize/Ranking#search-rank)
+ * `address_rank` - place [address rank](../customize/Ranking#address-rank)
* `display_name` - full comma-separated address
* `class`, `type` - key and value of the main OSM tag
* `importance` - computed importance rank
IF parent_place_id is null THEN
FOR location IN SELECT place_id FROM placex
- WHERE ST_DWithin(geom, placex.geometry, 0.001) and placex.rank_search = 26
+ WHERE ST_DWithin(geom, placex.geometry, 0.001)
+ and placex.rank_search = 26
+ and placex.osm_type = 'W' -- needed for index selection
ORDER BY CASE WHEN ST_GeometryType(geom) = 'ST_Line' THEN
(ST_distance(placex.geometry, ST_LineInterpolatePoint(geom,0))+
ST_distance(placex.geometry, ST_LineInterpolatePoint(geom,0.5))+
SELECT place_id FROM placex
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
AND rank_address between 5 and 25
+ AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
ORDER BY rank_address desc
LOOP
RETURN location.place_id;
SELECT place_id FROM placex
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
AND rank_address between 5 and 25
+ AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
ORDER BY rank_address desc
LOOP
RETURN location.place_id;
-- If extratags has a place tag, look for linked nodes by their place type.
-- Area and node still have to have the same name.
- IF bnd.extratags ? 'place' and bnd_name is not null THEN
+ IF bnd.extratags ? 'place' and bnd.extratags->'place' != 'postcode'
+ and bnd_name is not null
+ THEN
FOR linked_placex IN
SELECT * FROM placex
WHERE (position(lower(name->'name') in bnd_name) > 0
AND placex.osm_type = 'N'
AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
AND placex.rank_search < 26 -- needed to select the right index
- AND placex.type != 'postcode'
AND ST_Covers(bnd.geometry, placex.geometry)
LOOP
{% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %}
FROM placex
WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
and admin_level < NEW.admin_level and admin_level > 3
- and rank_address > 0
+ and rank_address between 1 and 25 -- for index selection
+ and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') -- for index selection
and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid)
ORDER BY admin_level desc LIMIT 1
LOOP
FROM placex,
LATERAL compute_place_rank(country_code, 'A', class, type,
admin_level, False, null) prank
- WHERE class = 'place' and rank_address < 24
+ WHERE class = 'place' and rank_address between 1 and 23
and prank.address_rank >= NEW.rank_address
+ and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') -- select right index
and geometry && NEW.geometry
and geometry ~ NEW.geometry -- needed because ST_Relate does not do bbox cover test
and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal
LATERAL compute_place_rank(country_code, 'A', class, type,
admin_level, False, null) prank
WHERE prank.address_rank < 24
+ and rank_address between 1 and 25 -- select right index
+ and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') -- select right index
and prank.address_rank >= NEW.rank_address
and geometry && NEW.geometry
and geometry ~ NEW.geometry -- needed because ST_Relate does not do bbox cover test
LATERAL compute_place_rank(country_code, 'A', class, type,
admin_level, False, null) prank
WHERE osm_type = 'R'
+ and rank_address between 1 and 25 -- select right index
+ and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') -- select right index
and ((class = 'place' and prank.address_rank = NEW.rank_address)
or (class = 'boundary' and rank_address = NEW.rank_address))
and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid)
CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
-
+---
CREATE INDEX IF NOT EXISTS idx_placex_rank_search
ON placex USING BTREE (rank_search) {{db.tablespace.search_index}};
-
+---
CREATE INDEX IF NOT EXISTS idx_placex_rank_address
ON placex USING BTREE (rank_address) {{db.tablespace.search_index}};
-
+---
CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
WHERE parent_place_id IS NOT NULL;
-
+---
+CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
+ USING GIST (geometry) {{db.tablespace.search_index}};
+---
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
ON placex USING gist (geometry) {{db.tablespace.search_index}}
WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-
+---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}
WHERE parent_place_id is not null;
-
+---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id
ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}};
-
+---
CREATE INDEX IF NOT EXISTS idx_postcode_postcode
ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}};
+{% if drop %}
+---
+ DROP INDEX IF EXISTS idx_placex_geometry_address_area_candidates;
+ DROP INDEX IF EXISTS idx_placex_geometry_buildings;
+ DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
+ DROP INDEX IF EXISTS idx_placex_wikidata;
+ DROP INDEX IF EXISTS idx_placex_rank_address_sector;
+ DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector;
+{% else %}
-- Indices only needed for updating.
-
-{% if not drop %}
- CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
- ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
- WHERE indexed_status > 0;
-
+---
CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
-
+---
CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique
ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}};
{% endif %}
-- Indices only needed for search.
-
{% if 'search_name' in db.tables %}
+---
CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector
ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
+---
CREATE INDEX IF NOT EXISTS idx_search_name_name_vector
ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
+---
CREATE INDEX IF NOT EXISTS idx_search_name_centroid
ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
{% if postgres.has_index_non_key_column %}
+---
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
ON placex USING btree (parent_place_id)
INCLUDE (housenumber) {{db.tablespace.search_index}}
WHERE housenumber is not null;
+---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
ON location_property_osmline USING btree(parent_place_id)
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
) {{db.tablespace.search_data}};
CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}};
-drop table if exists placex;
+--------- PLACEX - storage for all indexed places -----------------
+
+DROP TABLE IF EXISTS placex;
CREATE TABLE placex (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326)
) {{db.tablespace.search_data}};
+
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
-CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tablespace.search_index}};
-CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
-CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
+{% for osm_type in ('N', 'W', 'R') %}
+CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
+ USING BTREE (osm_id) {{db.tablespace.search_index}}
+ WHERE osm_type = '{{osm_type}}';
+{% endfor %}
+
+-- Usage: - removing linkage status on update
+-- - lookup linked places for /details
+CREATE INDEX idx_placex_linked_place_id ON placex
+ USING BTREE (linked_place_id) {{db.tablespace.address_index}}
+ WHERE linked_place_id IS NOT NULL;
+
+-- Usage: - check that admin boundaries do not overtake each other rank-wise
+-- - check that place node in a admin boundary with the same address level
+-- - boundary is not completely contained in a place area
+-- - parenting of large-area or unparentable features
+CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
+ USING gist (geometry) {{db.tablespace.address_index}}
+ WHERE rank_address between 1 and 25
+ and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
+
+-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
- USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.search_index}}
+ USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
+
+-- Usage: - linking of similar named places to boundaries
+-- - linking of place nodes with same type to boundaries
+-- - lookupPolygon()
CREATE INDEX idx_placex_geometry_placenode ON placex
- USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.search_index}}
+ USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26
- and class = 'place' and type != 'postcode' and linked_place_id is null;
-CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
+ and class = 'place' and type != 'postcode';
+
+-- Usage: - is node part of a way?
+-- - find parent of interpolation spatially
+CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
+ USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
+ WHERE osm_type = 'W' and rank_search >= 26;
+
+-- Usage: - linking place nodes by wikidata tag to boundaries
+CREATE INDEX idx_placex_wikidata on placex
+ USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
+ WHERE extratags ? 'wikidata' and class = 'place'
+ and osm_type = 'N' and rank_search < 26;
+
+-- The following two indexes function as a todo list for indexing.
+
+CREATE INDEX idx_placex_rank_address_sector ON placex
+ USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
+ WHERE indexed_status > 0;
+
+CREATE INDEX idx_placex_rank_boundaries_sector ON placex
+ USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
+ WHERE class = 'boundary' and type = 'administrative'
+ and indexed_status > 0;
+
DROP SEQUENCE IF EXISTS seq_place;
CREATE SEQUENCE seq_place start 1;
import psutil
from nominatim.config import Configuration
-from nominatim.db.connection import connect, Connection
+from nominatim.db.connection import connect
from nominatim.db import status, properties
from nominatim.tokenizer.base import AbstractTokenizer
from nominatim.version import version_str
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
+ num_threads = args.threads or psutil.cpu_count() or 1
+
country_info.setup_country_config(args.config)
if args.continue_at is None:
database_import.truncate_data_tables(conn)
LOG.warning('Load data into placex table')
- database_import.load_data(args.config.get_libpq_dsn(),
- args.threads or psutil.cpu_count() or 1)
+ database_import.load_data(args.config.get_libpq_dsn(), num_threads)
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
args.project_dir, tokenizer)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
- if args.continue_at is not None and args.continue_at != 'load-data':
- with connect(args.config.get_libpq_dsn()) as conn:
- self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
- indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
- args.threads or psutil.cpu_count() or 1)
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.create_search_indices(conn, args.config,
- drop=args.no_updates)
+ drop=args.no_updates,
+ threads=num_threads)
LOG.warning('Create search index for default country names.')
country_info.create_country_names(conn, tokenizer,
args.config.get_str_list('LANGUAGES'))
return tokenizer_factory.get_tokenizer_for_db(config)
- def _create_pending_index(self, conn: Connection, tablespace: str) -> None:
- """ Add a supporting index for finding places still to be indexed.
-
- This index is normally created at the end of the import process
- for later updates. When indexing was partially done, then this
- index can greatly improve speed going through already indexed data.
- """
- if conn.index_exists('idx_placex_pendingsector'):
- return
-
- with conn.cursor() as cur:
- LOG.warning('Creating support index')
- if tablespace:
- tablespace = 'TABLESPACE ' + tablespace
- cur.execute(f"""CREATE INDEX idx_placex_pendingsector
- ON placex USING BTREE (rank_address,geometry_sector)
- {tablespace} WHERE indexed_status > 0
- """)
- conn.commit()
-
-
def _finalize_database(self, dsn: str, offline: bool) -> None:
""" Determine the database date and set the status accordingly.
"""
# Use a dict to hand in the parameters because async is a reserved
# word in Python3.
- self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
+ self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True}) # type: ignore
+ assert self.conn
self.wait()
if cursor_factory is not None:
try:
conn = psycopg2.connect(dsn, connection_factory=Connection)
ctxmgr = cast(ConnectionContext, contextlib.closing(conn))
- ctxmgr.connection = cast(Connection, conn)
+ ctxmgr.connection = conn
return ctxmgr
except psycopg2.OperationalError as err:
raise UsageError(f"Cannot connect to database: {err}") from err
import jinja2
from nominatim.db.connection import Connection
+from nominatim.db.async_connection import WorkerPool
from nominatim.config import Configuration
def _get_partitions(conn: Connection) -> Set[int]:
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()
+
+
+ def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
+ **kwargs: Any) -> None:
+ """ Execure the given SQL files using parallel asynchronous connections.
+ The keyword arguments may supply additional parameters for
+ preprocessing.
+
+ After preprocessing the SQL code is cut at lines containing only
+ '---'. Each chunk is sent to one of the `num_threads` workers.
+ """
+ sql = self.env.get_template(name).render(**kwargs)
+
+ parts = sql.split('\n---\n')
+
+ with WorkerPool(dsn, num_threads) as pool:
+ for part in parts:
+ pool.next_free_worker().perform(part)
indexes.extend(('idx_placex_housenumber',
'idx_osmline_parent_osm_id_with_hnr'))
if conn.table_exists('place'):
- indexes.extend(('idx_placex_pendingsector',
- 'idx_location_area_country_place_id',
- 'idx_place_osm_unique'))
+ indexes.extend(('idx_location_area_country_place_id',
+ 'idx_place_osm_unique',
+ 'idx_placex_rank_address_sector',
+ 'idx_placex_rank_boundaries_sector'))
return indexes
def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
""" Checking for wikipedia/wikidata data
"""
- if not conn.table_exists('search_name'):
+ if not conn.table_exists('search_name') or not conn.table_exists('place'):
return CheckState.NOT_APPLICABLE
with conn.cursor() as cur:
WHERE pg_index.indisvalid = false
AND pg_index.indexrelid = pg_class.oid""")
- broken = list(cur)
+ broken = [c[0] for c in cur]
if broken:
return CheckState.FAIL, dict(indexes='\n '.join(broken))
cur.execute('ANALYSE')
-def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None:
+def create_search_indices(conn: Connection, config: Configuration,
+ drop: bool = False, threads: int = 1) -> None:
""" Create tables that have explicit partitioning.
"""
sql = SQLPreprocessor(conn, config)
- sql.run_sql_file(conn, 'indices.sql', drop=drop)
+ sql.run_parallel_sql_file(config.get_libpq_dsn(),
+ 'indices.sql', min(8, threads), drop=drop)
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
- assert temp_db_conn.index_exists('idx_placex_pendingsector')
-
# Calling it again still works for the index
assert self.call_nominatim('import', '--continue', 'indexing') == 0
- assert temp_db_conn.index_exists('idx_placex_pendingsector')
def test_import_continue_postprocess(self, mock_func_factory):