run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev
if [ "x$UBUNTUVER" == "x18" ]; then
- pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium PyYAML==5.1 datrie
+ pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 datrie
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
fi
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
with:
submodules: true
- - uses: actions/cache@v2
+ - uses: actions/cache@v3
with:
path: |
data/country_osm_grid.sql.gz
mv nominatim-src.tar.bz2 Nominatim
- name: 'Upload Artifact'
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v3
with:
name: full-source
path: nominatim-src.tar.bz2
pytest: py.test-3
php: 7.4
- ubuntu: 22
- postgresql: 14
+ postgresql: 15
postgis: 3
pytest: py.test-3
php: 8.1
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- - uses: actions/download-artifact@v2
+ - uses: actions/download-artifact@v3
with:
name: full-source
tools: phpunit, phpcs, composer
ini-values: opcache.jit=disable
- - uses: actions/setup-python@v2
+ - uses: actions/setup-python@v4
with:
python-version: 3.6
if: matrix.ubuntu == 18
if: matrix.ubuntu == 22
- name: Install latest pylint/mypy
- run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil typing-extensions
+ run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests typing-extensions
- name: PHP linting
run: phpcs --report-width=120 .
runs-on: ubuntu-20.04
steps:
- - uses: actions/download-artifact@v2
+ - uses: actions/download-artifact@v3
with:
name: full-source
OS: ${{ matrix.name }}
INSTALL_MODE: ${{ matrix.install_mode }}
- - uses: actions/download-artifact@v2
+ - uses: actions/download-artifact@v3
with:
name: full-source
path: /home/nominatim
``` ini
; Replace the tcp listener and add the unix socket
-listen = /var/run/php-fpm.sock
+listen = /var/run/php-fpm-nominatim.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
fastcgi_param SCRIPT_FILENAME "$document_root$uri.php";
fastcgi_param PATH_TRANSLATED "$document_root$uri.php";
fastcgi_param QUERY_STRING $args;
- fastcgi_pass unix:/var/run/php-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index index.php;
include fastcgi_params;
}
if (!-f $document_root$fastcgi_script_name) {
return 404;
}
- fastcgi_pass unix:/var/run/php-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
The file is about 400MB and adds around 4GB to the Nominatim database.
!!! tip
- If you forgot to download the wikipedia rankings, you can also add
- importances after the import. Download the files, then run
- `nominatim refresh --wiki-data --importance`. Updating importances for
- a planet can take a couple of hours.
+ If you forgot to download the wikipedia rankings, then you can
+ also add importances after the import. Download the SQL files, then
+ run `nominatim refresh --wiki-data --importance`. Updating
+ importances for a planet will take a couple of hours.
### External postcodes
database or reuse the space later.
!!! warning
- The datastructure for updates are also required when adding additional data
+ The data structure for updates are also required when adding additional data
after the import, for example [TIGER housenumber data](../customize/Tiger.md).
If you plan to use those, you must not use the `--no-updates` parameter.
Do a normal import, add the external data and once you are done with
Possible status codes are
- | | message | notes |
- |-----|----------------------|---------------------------------------------------|
- | 700 | "No database" | connection failed |
- | 701 | "Module failed" | database could not load nominatim.so |
- | 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
- | 703 | "Query failed" | test query against a database table failed |
- | 704 | "No value" | test query worked but returned no results |
+| | message | notes |
+| --- | ------------------------------ | ----------------------------------------------------------------- |
+| 700 | "No database" | connection failed |
+| 701 | "Module failed" | database could not load nominatim.so |
+| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
+| 703 | "Query failed" | test query against a database table failed |
+| 704 | "No value" | test query worked but returned no results |
+| 705 | "Import date is not available" | No import dates were returned (enabling replication can fix this) |
--- /dev/null
+## Importance
+
+Search requests can yield multiple results which match equally well with
+the original query. In such case Nominatim needs to order the results
+according to a different criterion: importance. This is a measure for how
+likely it is that a user will search for a given place. This section explains
+the sources Nominatim uses for computing importance of a place and how to
+customize them.
+
+### How importance is computed
+
+The main value for importance is derived from page ranking values for Wikipedia
+pages for a place. For places that do not have their own
+Wikipedia page, a formula is used that derives a static importance from the
+places [search rank](../customize/Ranking#search-rank).
+
+In a second step, a secondary importance value is added which is meant to
+represent how well-known the general area is where the place is located. It
+functions as a tie-breaker between places with very similar primary
+importance values.
+
+nominatim.org has preprocessed importance tables for the
+[primary Wikipedia rankings](https://nominatim.org/data/wikimedia-importance.sql.gz)
+and for a secondary importance based on the number of tile views on openstreetmap.org.
+
+### Customizing secondary importance
+
+The secondary importance is implemented as a simple
+[Postgis raster](https://postgis.net/docs/raster.html) table, where Nominatim
+looks up the value for the coordinates of the centroid of a place. You can
+provide your own secondary importance raster in form of an SQL file named
+`secondary_importance.sql.gz` in your project directory.
+
+The SQL file needs to drop and (re)create a table `secondary_importance` which
+must as a minimum contain a column `rast` of type `raster`. The raster must
+be in EPSG:4326 and contain 16bit unsigned ints
+(`raster_constraint_pixel_types(rast) = '{16BUI}'). Any other columns in the
+table will be ignored. You must furthermore create an index as follows:
+
+```
+CREATE INDEX ON secondary_importance USING gist(ST_ConvexHull(gist))
+```
+
+The following raster2pgsql command will create a table that conforms to
+the requirements:
+
+```
+raster2pgsql -I -C -Y -d -t 128x128 input.tiff public.secondary_importance
+```
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
- 1. Get preprocessed TIGER 2021 data:
+ 1. Get preprocessed TIGER data:
cd $PROJECT_DIR
- wget https://nominatim.org/data/tiger2021-nominatim-preprocessed.csv.tar.gz
+ wget https://nominatim.org/data/tiger-nominatim-preprocessed-latest.csv.tar.gz
2. Import the data into your Nominatim database:
- nominatim add-data --tiger-data tiger2021-nominatim-preprocessed.csv.tar.gz
+ nominatim add-data --tiger-data tiger-nominatim-preprocessed-latest.csv.tar.gz
3. Enable use of the Tiger data in your `.env` by adding:
- 'Configuration Settings': 'customize/Settings.md'
- 'Per-Country Data': 'customize/Country-Settings.md'
- 'Place Ranking' : 'customize/Ranking.md'
+ - 'Importance' : 'customize/Importance.md'
- 'Tokenizers' : 'customize/Tokenizers.md'
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
$sSQL .= ' ST_Distance(linegeo,'.$sPointSQL.') as distance';
$sSQL .= ' FROM location_property_osmline';
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')';
- $sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
+ $sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
+ $sSQL .= ' and parent_place_id != 0';
$sSQL .= ' ORDER BY distance ASC limit 1';
Debug::printSQL($sSQL);
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2),
- osm_type varchar(1), osm_id BIGINT)
+ rank_search SMALLINT,
+ centroid GEOMETRY)
RETURNS place_importance
AS $$
DECLARE
match RECORD;
result place_importance;
+ osm_views_exists BIGINT;
+ views BIGINT;
BEGIN
- FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
- WHERE language is not NULL
+ -- add importance by wikipedia article if the place has one
+ FOR match IN
+ SELECT * FROM get_wikipedia_match(extratags, country_code)
+ WHERE language is not NULL
LOOP
result.importance := match.importance;
result.wikipedia := match.language || ':' || match.title;
RETURN result;
END LOOP;
- IF extratags ? 'wikidata' THEN
+ -- Nothing? Then try with the wikidata tag.
+ IF result.importance is null AND extratags ? 'wikidata' THEN
FOR match IN SELECT * FROM wikipedia_article
WHERE wd_page_title = extratags->'wikidata'
- ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
+ ORDER BY language = 'en' DESC, langcount DESC LIMIT 1
+ LOOP
result.importance := match.importance;
result.wikipedia := match.language || ':' || match.title;
RETURN result;
END LOOP;
END IF;
- RETURN null;
+ -- Still nothing? Fall back to a default.
+ IF result.importance is null THEN
+ result.importance := 0.75001 - (rank_search::float / 40);
+ END IF;
+
+{% if 'secondary_importance' in db.tables %}
+ FOR match IN
+ SELECT ST_Value(rast, centroid) as importance
+ FROM secondary_importance
+ WHERE ST_Intersects(ST_ConvexHull(rast), centroid) LIMIT 1
+ LOOP
+ -- Secondary importance as tie breaker with 0.0001 weight.
+ result.importance := result.importance + match.importance::float / 655350000;
+ END LOOP;
+{% endif %}
+
+ RETURN result;
END;
$$
LANGUAGE plpgsql;
NEW.importance := null;
SELECT wikipedia, importance
- FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id)
+ FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid)
INTO NEW.wikipedia,NEW.importance;
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
IF linked_place is not null THEN
-- Recompute the ranks here as the ones from the linked place might
-- have been shifted to accommodate surrounding boundaries.
- SELECT place_id, osm_id, class, type, extratags,
+ SELECT place_id, osm_id, class, type, extratags, rank_search,
centroid, geometry,
(compute_place_rank(country_code, osm_type, class, type, admin_level,
(extratags->'capital') = 'yes', null)).*
SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code,
- 'N', location.osm_id)
+ location.rank_search, NEW.centroid)
INTO linked_wikipedia,linked_importance;
-- Use the maximum importance if one could be computed from the linked object.
-- null table so it won't error
-- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it
-CREATE TABLE wikipedia_article (
+CREATE TABLE IF NOT EXISTS wikipedia_article (
language text NOT NULL,
title text NOT NULL,
langcount integer,
wd_page_title text,
instance_of text
);
-ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title);
-CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id);
-CREATE TABLE wikipedia_redirect (
+CREATE TABLE IF NOT EXISTS wikipedia_redirect (
language text,
from_title text,
to_title text
);
-ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title);
-- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations.
# just use the pgxs makefile
-foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "14" "13" "12" "11" "10" "9.6")
+foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6")
list(APPEND PG_CONFIG_HINTS
"/usr/pgsql-${suffix}/bin")
endforeach()
address_levels: bool
functions: bool
wiki_data: bool
+ secondary_importance: bool
importance: bool
website: bool
diffs: bool
help='Update the PL/pgSQL functions in the database')
group.add_argument('--wiki-data', action='store_true',
help='Update Wikipedia/data importance numbers')
+ group.add_argument('--secondary-importance', action='store_true',
+ help='Update secondary importance raster data')
group.add_argument('--importance', action='store_true',
help='Recompute place importances (expensive!)')
group.add_argument('--website', action='store_true',
help='Enable debug warning statements in functions')
- def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches
+ def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
from ..tools import refresh, postcodes
from ..indexer.indexer import Indexer
with connect(args.config.get_libpq_dsn()) as conn:
refresh.load_address_levels_from_config(conn, args.config)
+ # Attention: must come BEFORE functions
+ if args.secondary_importance:
+ with connect(args.config.get_libpq_dsn()) as conn:
+ # If the table did not exist before, then the importance code
+ # needs to be enabled.
+ if not conn.table_exists('secondary_importance'):
+ args.functions = True
+
+ LOG.warning('Import secondary importance raster data from %s', args.project_dir)
+ if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+ args.project_dir) > 0:
+ LOG.fatal('FATAL: Cannot update sendary importance raster data')
+ return 1
+
if args.functions:
LOG.warning('Create functions')
with connect(args.config.get_libpq_dsn()) as conn:
LOG.warning("Initialising replication updates")
with connect(args.config.get_libpq_dsn()) as conn:
- replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
+ replication.init_replication(conn, base_url=args.config.REPLICATION_URL,
+ socket_timeout=args.socket_timeout)
if args.update_functions:
LOG.warning("Create functions")
refresh.create_functions(conn, args.config, True, False)
from ..tools import replication
with connect(args.config.get_libpq_dsn()) as conn:
- return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
+ return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL,
+ socket_timeout=args.socket_timeout)
def _report_update(self, batchdate: dt.datetime,
while True:
with connect(args.config.get_libpq_dsn()) as conn:
start = dt.datetime.now(dt.timezone.utc)
- state = replication.update(conn, params)
+ state = replication.update(conn, params, socket_timeout=args.socket_timeout)
if state is not replication.UpdateState.NO_CHANGES:
status.log_status(conn, start, 'import')
batchdate, _, _ = status.get_status(conn)
help="Do not keep tables that are only needed for "
"updating the database later")
group2.add_argument('--offline', action='store_true',
- help="Do not attempt to load any additional data from the internet")
+ help="Do not attempt to load any additional data from the internet")
group3 = parser.add_argument_group('Expert options')
group3.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
drop=args.no_updates,
ignore_errors=args.ignore_errors)
- self._setup_tables(args.config, args.reverse_only)
-
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
- 'Will be using default importances.')
+ 'Calculating importance values of locations will not '
+ 'use Wikipedia importance data.')
+
+ LOG.warning('Importing secondary importance raster data')
+ if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+ args.project_dir) != 0:
+ LOG.error('Secondary importance file not imported. '
+ 'Falling back to default ranking.')
+
+ self._setup_tables(args.config, args.reverse_only)
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables')
with conn.cursor() as cur:
cur.execute('ANALYZE')
- self.index_by_rank(0, 4)
- _analyze()
+ if self.index_by_rank(0, 4) > 0:
+ _analyze()
- self.index_boundaries(0, 30)
- _analyze()
+ if self.index_boundaries(0, 30) > 100:
+ _analyze()
- self.index_by_rank(5, 25)
- _analyze()
+ if self.index_by_rank(5, 25) > 100:
+ _analyze()
- self.index_by_rank(26, 30)
- _analyze()
+ if self.index_by_rank(26, 30) > 1000:
+ _analyze()
- self.index_postcodes()
- _analyze()
+ if self.index_postcodes() > 100:
+ _analyze()
- def index_boundaries(self, minrank: int, maxrank: int) -> None:
+ def index_boundaries(self, minrank: int, maxrank: int) -> int:
""" Index only administrative boundaries within the given rank range.
"""
+ total = 0
LOG.warning("Starting indexing boundaries using %s threads",
self.num_threads)
with self.tokenizer.name_analyzer() as analyzer:
for rank in range(max(minrank, 4), min(maxrank, 26)):
- self._index(runners.BoundaryRunner(rank, analyzer))
+ total += self._index(runners.BoundaryRunner(rank, analyzer))
- def index_by_rank(self, minrank: int, maxrank: int) -> None:
+ return total
+
+ def index_by_rank(self, minrank: int, maxrank: int) -> int:
""" Index all entries of placex in the given rank range (inclusive)
in order of their address rank.
When rank 30 is requested then also interpolations and
places with address rank 0 will be indexed.
"""
+ total = 0
maxrank = min(maxrank, 30)
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
minrank, maxrank, self.num_threads)
with self.tokenizer.name_analyzer() as analyzer:
for rank in range(max(1, minrank), maxrank + 1):
- self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1)
+ total += self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1)
if maxrank == 30:
- self._index(runners.RankRunner(0, analyzer))
- self._index(runners.InterpolationRunner(analyzer), 20)
+ total += self._index(runners.RankRunner(0, analyzer))
+ total += self._index(runners.InterpolationRunner(analyzer), 20)
+
+ return total
- def index_postcodes(self) -> None:
+ def index_postcodes(self) -> int:
"""Index the entries of the location_postcode table.
"""
LOG.warning("Starting indexing postcodes using %s threads", self.num_threads)
- self._index(runners.PostcodeRunner(), 20)
+ return self._index(runners.PostcodeRunner(), 20)
def update_status_table(self) -> None:
conn.commit()
- def _index(self, runner: runners.Runner, batch: int = 1) -> None:
+ def _index(self, runner: runners.Runner, batch: int = 1) -> int:
""" Index a single rank or table. `runner` describes the SQL to use
for indexing. `batch` describes the number of objects that
should be processed with a single SQL statement
conn.commit()
- progress.done()
+ return progress.done()
self.next_info += int(places_per_sec) * self.log_interval
- def done(self) -> None:
+ def done(self) -> int:
""" Print final statistics about the progress.
"""
rank_end_time = datetime.now()
LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
self.done_places, self.total_places, int(diff_seconds),
places_per_sec, self.name)
+
+ return self.done_places
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
+
+ postgis_version = conn.postgis_version_tuple()
+ if postgis_version[0] >= 3:
+ cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
+
conn.commit()
_require_version('PostGIS',
from psycopg2 import sql as pysql
from nominatim.config import Configuration
-from nominatim.db.connection import Connection
+from nominatim.db.connection import Connection, connect
from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.version import version_str
return 0
+def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
+ """ Replaces the secondary importance raster data table with new data.
+
+ Returns 0 if all was well and 1 if the raster SQL file could not
+ be found. Throws an exception if there was an error reading the file.
+ """
+ datafile = data_path / 'secondary_importance.sql.gz'
+ if not datafile.exists():
+ return 1
+
+ with connect(dsn) as conn:
+ postgis_version = conn.postgis_version_tuple()
+ if postgis_version[0] < 3:
+ LOG.error('PostGIS version is too old for using OSM raster data.')
+ return 2
+
+ execute_file(dsn, datafile, ignore_errors=ignore_errors)
+
+ return 0
def recompute_importance(conn: Connection) -> None:
""" Recompute wikipedia links and importance for all entries in placex.
cur.execute("""
UPDATE placex SET (wikipedia, importance) =
(SELECT wikipedia, importance
- FROM compute_importance(extratags, country_code, osm_type, osm_id))
+ FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
""")
cur.execute("""
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
"""
Functions for updating a database from a replication source.
"""
-from typing import ContextManager, MutableMapping, Any, Generator, cast
+from typing import ContextManager, MutableMapping, Any, Generator, cast, Iterator
from contextlib import contextmanager
import datetime as dt
from enum import Enum
import logging
import time
+import types
+import urllib.request as urlrequest
+import requests
from nominatim.db import status
from nominatim.db.connection import Connection
from nominatim.tools.exec_utils import run_osm2pgsql
try:
from osmium.replication.server import ReplicationServer
from osmium import WriteHandler
+ from osmium import version as pyo_version
except ImportError as exc:
logging.getLogger().critical("pyosmium not installed. Replication functions not available.\n"
"To install pyosmium via pip: pip3 install osmium")
LOG = logging.getLogger()
-def init_replication(conn: Connection, base_url: str) -> None:
+def init_replication(conn: Connection, base_url: str,
+ socket_timeout: int = 60) -> None:
""" Set up replication for the server at the given base URL.
"""
LOG.info("Using replication source: %s", base_url)
# margin of error to make sure we get all data
date -= dt.timedelta(hours=3)
- repl = ReplicationServer(base_url)
-
- seq = repl.timestamp_to_sequence(date)
+ with _make_replication_server(base_url, socket_timeout) as repl:
+ seq = repl.timestamp_to_sequence(date)
if seq is None:
LOG.fatal("Cannot reach the configured replication service '%s'.\n"
LOG.warning("Updates initialised at sequence %s (%s)", seq, date)
-def check_for_updates(conn: Connection, base_url: str) -> int:
+def check_for_updates(conn: Connection, base_url: str,
+ socket_timeout: int = 60) -> int:
""" Check if new data is available from the replication service at the
given base URL.
"""
"Please run 'nominatim replication --init' first.")
return 254
- state = ReplicationServer(base_url).get_state_info()
+ with _make_replication_server(base_url, socket_timeout) as repl:
+ state = repl.get_state_info()
if state is None:
LOG.error("Cannot get state for URL %s.", base_url)
NO_CHANGES = 3
-def update(conn: Connection, options: MutableMapping[str, Any]) -> UpdateState:
+def update(conn: Connection, options: MutableMapping[str, Any],
+ socket_timeout: int = 60) -> UpdateState:
""" Update database from the next batch of data. Returns the state of
updates according to `UpdateState`.
"""
options['import_file'].unlink()
# Read updates into file.
- with _make_replication_server(options['base_url']) as repl:
+ with _make_replication_server(options['base_url'], socket_timeout) as repl:
outhandler = WriteHandler(str(options['import_file']))
endseq = repl.apply_diffs(outhandler, startseq + 1,
max_size=options['max_diff_size'] * 1024)
return UpdateState.UP_TO_DATE
-def _make_replication_server(url: str) -> ContextManager[ReplicationServer]:
+def _make_replication_server(url: str, timeout: int) -> ContextManager[ReplicationServer]:
""" Returns a ReplicationServer in form of a context manager.
Creates a light wrapper around older versions of pyosmium that did
not support the context manager interface.
"""
if hasattr(ReplicationServer, '__enter__'):
- return cast(ContextManager[ReplicationServer], ReplicationServer(url))
+ # Patches the open_url function for pyosmium >= 3.2
+ # where the socket timeout is no longer respected.
+ def patched_open_url(self: ReplicationServer, url: urlrequest.Request) -> Any:
+ """ Download a resource from the given URL and return a byte sequence
+ of the content.
+ """
+ get_params = {
+ 'headers': {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"},
+ 'timeout': timeout or None,
+ 'stream': True
+ }
+
+ if self.session is not None:
+ return self.session.get(url.get_full_url(), **get_params)
+
+ @contextmanager
+ def _get_url_with_session() -> Iterator[requests.Response]:
+ with requests.Session() as session:
+ request = session.get(url.get_full_url(), **get_params) # type: ignore
+ yield request
+
+ return _get_url_with_session()
+
+ repl = ReplicationServer(url)
+ repl.open_url = types.MethodType(patched_open_url, repl)
+
+ return cast(ContextManager[ReplicationServer], repl)
@contextmanager
def get_cm() -> Generator[ReplicationServer, None, None]:
-Subproject commit b0352aa8f15e2739ba36d72561854a2738123770
+Subproject commit 6a5d2500e9689f55485d186306aadc55560085fd
| 0 |
Then there are duplicates
+ @fail-legacy
Scenario: Search with bounded viewbox in right area
- When sending json search query "bar" with address
+ When sending json search query "post" with address
| bounded | viewbox |
| 1 | 9,47,10,48 |
Then result addresses contain
| ID | town |
| 0 | Vaduz |
- When sending json search query "bar" with address
+ When sending json search query "post" with address
| bounded | viewbox |
| 1 | 9.49712,47.17122,9.52605,47.16242 |
Then result addresses contain
Then result has centroid in 9.49712,47.16242,9.52605,47.17122
Scenario: Prefer results within viewbox
- When sending json search query "Gässle" with address
- | accept-language |
- | en |
- Then result addresses contain
- | ID | town |
- | 0 | Balzers |
When sending json search query "Gässle" with address
| accept-language | viewbox |
| en | 9.52413,47.10759,9.53140,47.10539 |
Then result addresses contain
| ID | village |
| 0 | Triesen |
+ When sending json search query "Gässle" with address
+ | accept-language | viewbox |
+ | en | 9.45949,47.08421,9.54094,47.05466 |
+ Then result addresses contain
+ | ID | town |
+ | 0 | Balzers |
Scenario: viewboxes cannot be points
When sending json search query "foo"
Scenario: Limit number of search results
When sending json search query "landstr"
+ | dedupe |
+ | 0 |
Then more than 4 results are returned
When sending json search query "landstr"
- | limit |
- | 4 |
+ | limit | dedupe |
+ | 4 | 0 |
Then exactly 4 results are returned
Scenario: Limit parameter must be a number
self.api_db_done = True
if not self._reuse_or_drop_db(self.api_test_db):
- testdata = Path('__file__') / '..' / '..' / 'testdb'
- self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
+ testdata = (Path(__file__) / '..' / '..' / '..' / 'testdb').resolve()
+ self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata)
+ simp_file = Path(self.website_dir.name) / 'secondary_importance.sql.gz'
+ simp_file.symlink_to(testdata / 'secondary_importance.sql.gz')
try:
self.run_nominatim('import', '--osm-file', str(self.api_test_file))
- self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
+ self.run_nominatim('add-data', '--tiger-data', str(testdata / 'tiger'))
self.run_nominatim('freeze')
if self.tokenizer == 'legacy':
- phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
+ phrase_file = str(testdata / 'specialphrases_testdb.sql')
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
else:
- csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
+ csv_path = str(testdata / 'full_en_phrases_test.csv')
self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
except:
self.db_drop_database(self.api_test_db)
mock_func_factory(nominatim.data.country_info, 'setup_country_tables'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
+ mock_func_factory(nominatim.tools.refresh, 'import_secondary_importance'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_tables'),
assert self.call_nominatim('refresh', '--wiki-data') == 1
+ def test_refresh_secondary_importance_file_not_found(self):
+ assert self.call_nominatim('refresh', '--secondary-importance') == 1
+
+
+ def test_refresh_secondary_importance_new_table(self, mock_func_factory):
+ mocks = [mock_func_factory(nominatim.tools.refresh, 'import_secondary_importance'),
+ mock_func_factory(nominatim.tools.refresh, 'create_functions')]
+
+ assert self.call_nominatim('refresh', '--secondary-importance') == 0
+ assert mocks[0].called == 1
+ assert mocks[1].called == 1
+
def test_refresh_importance_computed_after_wiki_import(self, monkeypatch):
calls = []
assert refresh.import_wikipedia_articles(dsn, Path('.')) == 1
+def test_refresh_import_secondary_importance_non_existing(dsn):
+ assert refresh.import_secondary_importance(dsn, Path('.')) == 1
+
+def test_refresh_import_secondary_importance_testdb(dsn, src_dir, temp_db_conn, temp_db_cursor):
+ temp_db_cursor.execute('CREATE EXTENSION postgis')
+
+ if temp_db_conn.postgis_version_tuple()[0] < 3:
+ assert refresh.import_secondary_importance(dsn, src_dir / 'test' / 'testdb') > 0
+ else:
+ temp_db_cursor.execute('CREATE EXTENSION postgis_raster')
+ assert refresh.import_secondary_importance(dsn, src_dir / 'test' / 'testdb') == 0
+
+ assert temp_db_conn.table_exists('secondary_importance')
+
+
@pytest.mark.parametrize("replace", (True, False))
def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, replace):
if replace:
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2),
osm_type varchar(1), osm_id BIGINT,
+ centroid GEOMETRY,
OUT importance FLOAT,
OUT wikipedia TEXT)
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")
sudo tee /etc/php/7.2/fpm/pool.d/www.conf << EOF_PHP_FPM_CONF
[www]
; Replace the tcp listener and add the unix socket
-listen = /var/run/php7.2-fpm.sock
+listen = /var/run/php-fpm-nominatim.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
fastcgi_param SCRIPT_FILENAME "\$document_root\$uri.php";
fastcgi_param PATH_TRANSLATED "\$document_root\$uri.php";
fastcgi_param QUERY_STRING \$args;
- fastcgi_pass unix:/var/run/php7.2-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index index.php;
include fastcgi_params;
}
if (!-f \$document_root\$fastcgi_script_name) {
return 404;
}
- fastcgi_pass unix:/var/run/php7.2-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
sudo tee /etc/php/7.4/fpm/pool.d/www.conf << EOF_PHP_FPM_CONF
[www]
; Replace the tcp listener and add the unix socket
-listen = /var/run/php7.4-fpm.sock
+listen = /var/run/php-fpm-nominatim.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
fastcgi_param SCRIPT_FILENAME "\$document_root\$uri.php";
fastcgi_param PATH_TRANSLATED "\$document_root\$uri.php";
fastcgi_param QUERY_STRING \$args;
- fastcgi_pass unix:/var/run/php7.4-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index index.php;
include fastcgi_params;
}
if (!-f \$document_root\$fastcgi_script_name) {
return 404;
}
- fastcgi_pass unix:/var/run/php7.4-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
EOF_NGINX_CONF
#DOCS:```
-# If you have some errors, make sure that php7.4-fpm.sock is well under
+# If you have some errors, make sure that php-fpm-nominatim.sock is well under
# /var/run/ and not under /var/run/php. Otherwise change the Nginx configuration
-# to /var/run/php/php7.4-fpm.sock.
+# to /var/run/php/php-fpm-nominatim.sock.
#
# Enable the configuration and restart Nginx
#
sudo tee /etc/php/8.1/fpm/pool.d/www.conf << EOF_PHP_FPM_CONF
[www]
; Replace the tcp listener and add the unix socket
-listen = /var/run/php8.1-fpm.sock
+listen = /var/run/php-fpm-nominatim.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
fastcgi_param SCRIPT_FILENAME "\$document_root\$uri.php";
fastcgi_param PATH_TRANSLATED "\$document_root\$uri.php";
fastcgi_param QUERY_STRING \$args;
- fastcgi_pass unix:/var/run/php8.1-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index index.php;
include fastcgi_params;
}
if (!-f \$document_root\$fastcgi_script_name) {
return 404;
}
- fastcgi_pass unix:/var/run/php7.4-fpm.sock;
+ fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
EOF_NGINX_CONF
#DOCS:```
-# If you have some errors, make sure that php8.1-fpm.sock is well under
+# If you have some errors, make sure that php-fpm-nominatim.sock is well under
# /var/run/ and not under /var/run/php. Otherwise change the Nginx configuration
-# to /var/run/php/php8.1-fpm.sock.
+# to /var/run/php/php-fpm-nominatim.sock.
#
# Enable the configuration and restart Nginx
#