- name: Remove existing PostgreSQL
run: |
sudo apt-get purge -yq postgresql*
- sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
+ sudo apt install curl ca-certificates gnupg
+ curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
+ sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo apt-get update -qq
shell: bash
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager
-good-names=i,x,y,fd,db,cc
+good-names=i,x,y,m,fd,db,cc
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 4)
-set(NOMINATIM_VERSION_MINOR 1)
+set(NOMINATIM_VERSION_MINOR 2)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
install(DIRECTORY nominatim
DESTINATION ${NOMINATIM_LIBDIR}/lib-python
FILES_MATCHING PATTERN "*.py"
+ PATTERN "paths.py" EXCLUDE
PATTERN __pycache__ EXCLUDE)
+
+ configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
+ install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
+ DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
+ RENAME paths.py)
+
install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
install(FILES ${COUNTRY_GRID_FILE}
cd build
make test
```
+
+## Releases
+
+Nominatim follows semantic versioning. Major releases are done for large changes
+that require (or at least strongly recommend) a reimport of the databases.
+Minor releases can usually be applied to exisiting databases Patch releases
+contain bug fixes only and are released from a separate branch where the
+relevant changes are cherry-picked from the master branch.
+
+Checklist for releases:
+
+* [ ] increase version in `nominatim/version.py` and CMakeLists.txt
+* [ ] update `ChangeLog` (copy information from patch releases from release branch)
+* [ ] complete `docs/admin/Migration.md`
+* [ ] update EOL dates in `SECURITY.md`
+* [ ] commit and make sure CI tests pass
+* [ ] test migration
+ * download, build and import previous version
+ * migrate using master version
+ * run updates using master version
+* [ ] prepare tarball:
+ * `git clone --recursive https://github.com/osm-search/Nominatim` (switch to right branch!)
+ * `rm -r .git* osm2pgsql/.git*`
+ * copy country data into `data/`
+ * add version to base directory and package
+* [ ] upload tarball to https://nominatim.org
+* [ ] prepare documentation
+ * check out new docs branch
+ * change git checkout instructions to tarball download instructions or adapt version on existing ones
+ * build documentation and copy to https://github.com/osm-search/nominatim-org-site
+ * add new version to history
+* [ ] check release tarball
+ * download tarball as per new documentation instructions
+ * compile and import Nominatim
+ * run `nominatim --version` to confirm correct version
+* [ ] tag new release and add a release on github.com
+4.2.0
+
+ * add experimental support for osm2pgsql flex style
+ * introduce secondary importance value to be retrieved from a raster data file
+ (currently still unused, to replace address importance, thanks to @tareqpi)
+ * add new report tool `nominatim admin --collect-os-info`
+ (thanks @micahcochran, @tareqpi)
+ * reorganise index to improve lookup performance and size
+ * run index creation after import in parallel
+ * run ANALYZE more selectively to speed up continuation of indexing
+ * fix crash on update when addr:interpolation receives an illegal value
+ * fix minimum number of retrieved results to be at least 10
+ * fix search for combinations of special term + name (e.g Hotel Bellevue)
+ * do not return interpolations without a parent street on reverse search
+ * improve invalidation of linked places on updates
+ * fix address parsing for interpolation lines
+ * make sure socket timeouts are respected during replication
+ (working around a bug in some versions of pyosmium)
+ * update bundled osm2pgsql to 1.7.1
+ * add support for PostgreSQL 15
+ * typing fixes to work with latest type annotations from typeshed
+ * smaller improvements to documentation (thanks to @mausch)
+
4.1.0
* switch to ICU tokenizer as default
| Version | End of support for security updates |
| ------- | ----------------------------------- |
+| 4.2.x | 2024-11-24 |
| 4.1.x | 2024-08-05 |
| 4.0.x | 2023-11-02 |
| 3.7.x | 2023-04-05 |
checkout = "no"
end
+ config.vm.provider "hyperv" do |hv, override|
+ hv.memory = 2048
+ hv.linked_clone = true
+ if ENV['CHECKOUT'] != 'y' then
+ override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: "smb", smb_host: ENV['SMB_HOST'] || ENV['COMPUTERNAME']
+ end
+ end
+
config.vm.provider "virtualbox" do |vb, override|
vb.gui = false
vb.memory = 2048
end
end
- config.vm.define "ubuntu", primary: true do |sub|
+ config.vm.define "ubuntu22", primary: true do |sub|
+ sub.vm.box = "generic/ubuntu2204"
+ sub.vm.provision :shell do |s|
+ s.path = "vagrant/Install-on-Ubuntu-22.sh"
+ s.privileged = false
+ s.args = [checkout]
+ end
+ end
+
+ config.vm.define "ubuntu22-apache" do |sub|
+ sub.vm.box = "generic/ubuntu2204"
+ sub.vm.provision :shell do |s|
+ s.path = "vagrant/Install-on-Ubuntu-22.sh"
+ s.privileged = false
+ s.args = [checkout, "install-apache"]
+ end
+ end
+
+ config.vm.define "ubuntu22-nginx" do |sub|
+ sub.vm.box = "generic/ubuntu2204"
+ sub.vm.provision :shell do |s|
+ s.path = "vagrant/Install-on-Ubuntu-22.sh"
+ s.privileged = false
+ s.args = [checkout, "install-nginx"]
+ end
+ end
+
+ config.vm.define "ubuntu20" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
end
end
- config.vm.define "ubuntu-apache" do |sub|
+ config.vm.define "ubuntu20-apache" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
end
end
- config.vm.define "ubuntu-nginx" do |sub|
+ config.vm.define "ubuntu20-nginx" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
end
end
- config.vm.define "centos7" do |sub|
- sub.vm.box = "centos/7"
- sub.vm.provision :shell do |s|
- s.path = "vagrant/Install-on-Centos-7.sh"
- s.privileged = false
- s.args = [checkout]
- end
- end
-
- config.vm.define "centos" do |sub|
- sub.vm.box = "generic/centos8"
- sub.vm.provision :shell do |s|
- s.path = "vagrant/Install-on-Centos-8.sh"
- s.privileged = false
- s.args = [checkout]
- end
- end
-
-
end
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Path settings for extra data used by Nominatim (installed version).
+"""
+from pathlib import Path
+
+PHPLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-php').resolve()
+SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
+DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
+CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
-os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
-
from nominatim import cli
from nominatim import version
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
- phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
- sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
- data_dir='@NOMINATIM_DATADIR@',
- config_dir='@NOMINATIM_CONFIGDIR@',
phpcgi_path='@PHPCGI_BIN@'))
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
-os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
-
from nominatim import cli
from nominatim import version
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
- phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
- sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
- data_dir='@CMAKE_SOURCE_DIR@/data',
- config_dir='@CMAKE_SOURCE_DIR@/settings',
phpcgi_path='@PHPCGI_BIN@'))
That said if you installed your own Nominatim instance you can use the
`nominatim export` PHP script as basis to return such lists.
+
+#### 7. My result has a wrong postcode. Where does it come from?
+
+Most places in OSM don't have a postcode, so Nominatim tries to interpolate
+one. It first look at all the places that make up the address of the place.
+If one of them has a postcode defined, this is the one to be used. When
+none of the address parts has a postcode either, Nominatim interpolates one
+from the surrounding objects. If the postcode is for your result is one, then
+most of the time there is an OSM object with the wrong postcode nearby.
+
+To find the bad postcode, go to
+[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
+and search for your place. When you have found it, click on the 'details' link
+under the result to go to the details page. There is a field 'Computed Postcode'
+which should display the bad postcode. Click on the 'how?' link. A small
+explanation text appears. It contains a link to a query for Overpass Turbo.
+Click on that and you get a map with all places in the area that have the bad
+postcode. If none is displayed, zoom the map out a bit and then click on 'Run'.
+
+Now go to [OpenStreetMap](https://openstreetmap.org) and fix the error you
+have just found. It will take at least a day for Nominatim to catch up with
+your data fix. Sometimes longer, depending on how much editing activity is in
+the area.
+
<request time> <execution time in s> <number of results> <type> "<query string>"
Request time is the time when the request was started. The execution time is
-given in ms and corresponds to the time the query took executing in PHP.
+given in seconds and corresponds to the time the query took executing in PHP.
type contains the name of the endpoint used.
Can be used as the same time as NOMINATIM_LOG_DB.
rendering:
heading_level: 6
+##### clean-tiger-tags
+
+::: nominatim.tokenizer.sanitizers.clean_tiger_tags
+ selection:
+ members: False
+ rendering:
+ heading_level: 6
+
+
#### Token Analysis
{% endif %}
END IF;
- IF NEW.postcode is null AND NEW.rank_search > 8 THEN
+ IF NEW.postcode is null AND NEW.rank_search > 8
+ AND (NEW.rank_address > 0
+ OR ST_GeometryType(NEW.geometry) not in ('ST_LineString','ST_MultiLineString')
+ OR ST_Length(NEW.geometry) < 0.02)
+ THEN
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
END IF;
self.parser.print_help()
return 1
- for arg in ('module_dir', 'osm2pgsql_path', 'phplib_dir', 'sqllib_dir',
- 'data_dir', 'config_dir', 'phpcgi_path'):
- setattr(args, arg, Path(kwargs[arg]))
+ args.phpcgi_path = Path(kwargs['phpcgi_path'])
args.project_dir = Path(args.project_dir).resolve()
if 'cli_args' not in kwargs:
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
- args.config = Configuration(args.project_dir, args.config_dir,
+ args.config = Configuration(args.project_dir,
environ=kwargs.get('environ', os.environ))
- args.config.set_libdirs(module=args.module_dir,
- osm2pgsql=args.osm2pgsql_path,
- php=args.phplib_dir,
- sql=args.sqllib_dir,
- data=args.data_dir)
+ args.config.set_libdirs(module=kwargs['module_dir'],
+ osm2pgsql=kwargs['osm2pgsql_path'])
log = logging.getLogger()
log.warning('Using project directory: %s', str(args.project_dir))
if args.restrict_to_osm_relation:
params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation))
- return run_legacy_script('export.php', *params, nominatim_env=args)
+ return run_legacy_script('export.php', *params, config=args.config)
class AdminServe:
params.append('--reverse-only')
if args.target == 'search':
params.append('--search-only')
- return run_legacy_script(*params, nominatim_env=args)
+ return run_legacy_script(*params, config=args.config)
# Basic environment set by root program.
config: Configuration
project_dir: Path
- module_dir: Path
- osm2pgsql_path: Path
- phplib_dir: Path
- sqllib_dir: Path
- data_dir: Path
- config_dir: Path
phpcgi_path: Path
# Global switches
from the command line arguments. The resulting dict can be
further customized and then used in `run_osm2pgsql()`.
"""
- return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
+ return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.config.lib_dir.osm2pgsql,
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
osm2pgsql_style=self.config.get_import_style_file(),
osm2pgsql_style_path=self.config.config_dir,
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
- args.data_dir,
+ args.config.lib_dir.data,
args.no_partitions)
LOG.warning('Importing OSM data file')
from nominatim.typing import StrPath
from nominatim.errors import UsageError
+import nominatim.paths
LOG = logging.getLogger()
CONFIG_CACHE : Dict[str, Any] = {}
avoid conflicts with other environment variables.
"""
- def __init__(self, project_dir: Path, config_dir: Path,
+ def __init__(self, project_dir: Optional[Path],
environ: Optional[Mapping[str, str]] = None) -> None:
self.environ = environ or os.environ
self.project_dir = project_dir
- self.config_dir = config_dir
- self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
- if project_dir is not None and (project_dir / '.env').is_file():
- self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
+ self.config_dir = nominatim.paths.CONFIG_DIR
+ self._config = dotenv_values(str(self.config_dir / 'env.defaults'))
+ if self.project_dir is not None and (self.project_dir / '.env').is_file():
+ self.project_dir = self.project_dir.resolve()
+ self._config.update(dotenv_values(str(self.project_dir / '.env')))
class _LibDirs:
module: Path
osm2pgsql: Path
- php: Path
- sql: Path
- data: Path
+ php = nominatim.paths.PHPLIB_DIR
+ sql = nominatim.paths.SQLLIB_DIR
+ data = nominatim.paths.DATA_DIR
self.lib_dir = _LibDirs()
self._private_plugins: Dict[str, object] = {}
""" Set paths to library functions and data.
"""
for key, value in kwargs.items():
- setattr(self.lib_dir, key, Path(value).resolve())
+ setattr(self.lib_dir, key, Path(value))
def __getattr__(self, name: str) -> str:
cfgpath = Path(value)
if not cfgpath.is_absolute():
+ assert self.project_dir is not None
cfgpath = self.project_dir / cfgpath
return cfgpath.resolve()
return self.find_config_file('', 'IMPORT_STYLE')
- def get_os_env(self) -> Dict[str, Optional[str]]:
+ def get_os_env(self) -> Dict[str, str]:
""" Return a copy of the OS environment with the Nominatim configuration
merged in.
"""
- env = dict(self._config)
+ env = {k: v for k, v in self._config.items() if v is not None}
env.update(self.environ)
return env
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Path settings for extra data used by Nominatim.
+"""
+from pathlib import Path
+
+PHPLIB_DIR = (Path(__file__) / '..' / '..' / 'lib-php').resolve()
+SQLLIB_DIR = (Path(__file__) / '..' / '..' / 'lib-sql').resolve()
+DATA_DIR = (Path(__file__) / '..' / '..' / 'data').resolve()
+CONFIG_DIR = (Path(__file__) / '..' / '..' / 'settings').resolve()
module_name = config.TOKENIZER
# Create the directory for the tokenizer data
+ assert config.project_dir is not None
basedir = config.project_dir / 'tokenizer'
if not basedir.exists():
basedir.mkdir()
The function looks up the appropriate tokenizer in the database
and initialises it.
"""
+ assert config.project_dir is not None
basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir():
# Directory will be repopulated by tokenizer below.
This copies all necessary data in the project directory to make
sure the tokenizer remains stable even over updates.
"""
+ assert config.project_dir is not None
module_dir = _install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
config.project_dir / 'module')
def init_from_project(self, config: Configuration) -> None:
""" Initialise the tokenizer from the project directory.
"""
+ assert config.project_dir is not None
+
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
def update_sql_functions(self, config: Configuration) -> None:
""" Reimport the SQL functions for this tokenizer.
"""
+ assert config.project_dir is not None
+
with connect(self.dsn) as conn:
max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
modulepath = config.DATABASE_MODULE_PATH or \
This is a special migration function for updating existing databases
to new software versions.
"""
+ assert config.project_dir is not None
+
self.normalization = config.TERM_NORMALIZATION
module_dir = _install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Sanitizer that preprocesses tags from the TIGER import.
+
+It makes the following changes:
+
+* remove state reference from tiger:county
+"""
+from typing import Callable
+import re
+
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
+
+COUNTY_MATCH = re.compile('(.*), [A-Z][A-Z]')
+
+def _clean_tiger_county(obj: ProcessInfo) -> None:
+ """ Remove the state reference from tiger:county tags.
+
+ This transforms a name like 'Hamilton, AL' into 'Hamilton'.
+ If no state reference is detected at the end, the name is left as is.
+ """
+ if not obj.address:
+ return
+
+ for item in obj.address:
+ if item.kind == 'tiger' and item.suffix == 'county':
+ m = COUNTY_MATCH.fullmatch(item.name)
+ if m:
+ item.name = m[1]
+ # Switch kind and suffix, the split left them reversed.
+ item.kind = 'county'
+ item.suffix = 'tiger'
+
+ return
+
+
+def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+ """ Create a housenumber processing function.
+ """
+ return _clean_tiger_county
import urllib.request as urlrequest
from urllib.parse import urlencode
+from nominatim.config import Configuration
from nominatim.typing import StrPath
from nominatim.version import version_str
from nominatim.db.connection import get_pg_env
LOG = logging.getLogger()
def run_legacy_script(script: StrPath, *args: Union[int, str],
- nominatim_env: Any,
+ config: Configuration,
throw_on_fail: bool = False) -> int:
""" Run a Nominatim PHP script with the given arguments.
then throw a `CalledProcessError` on a non-zero exit.
"""
cmd = ['/usr/bin/env', 'php', '-Cq',
- str(nominatim_env.phplib_dir / 'admin' / script)]
+ str(config.lib_dir.php / 'admin' / script)]
cmd.extend([str(a) for a in args])
- env = nominatim_env.config.get_os_env()
- env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
- env['NOMINATIM_SQLDIR'] = str(nominatim_env.sqllib_dir)
- env['NOMINATIM_CONFIGDIR'] = str(nominatim_env.config_dir)
- env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(nominatim_env.module_dir)
+ env = config.get_os_env()
+ env['NOMINATIM_DATADIR'] = str(config.lib_dir.data)
+ env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql)
+ env['NOMINATIM_CONFIGDIR'] = str(config.config_dir)
+ env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module)
if not env['NOMINATIM_OSM2PGSQL_BINARY']:
- env['NOMINATIM_OSM2PGSQL_BINARY'] = str(nominatim_env.osm2pgsql_path)
+ env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql)
- proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env,
+ proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env,
check=throw_on_fail)
return proc.returncode
class TEXT,
type TEXT,
deferred BOOLEAN)""")
+
+
+@_migration(4, 1, 99, 1)
+def split_pending_index(conn: Connection, **_: Any) -> None:
+ """ Reorganise indexes for pending updates.
+ """
+ if conn.table_exists('place'):
+ with conn.cursor() as cur:
+ cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector
+ ON placex USING BTREE (rank_address, geometry_sector)
+ WHERE indexed_status > 0""")
+ cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector
+ ON placex USING BTREE (rank_search, geometry_sector)
+ WHERE class = 'boundary' and type = 'administrative'
+ and indexed_status > 0""")
+ cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector")
LOG.info('Creating website directory.')
basedir.mkdir()
+ assert config.project_dir is not None
template = dedent(f"""\
<?php
# patch level when cherry-picking the commit with the migration.
#
# Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (4, 1, 99, 0)
+NOMINATIM_VERSION = (4, 2, 0, 0)
POSTGRESQL_REQUIRED_VERSION = (9, 6)
POSTGIS_REQUIRED_VERSION = (2, 2)
local is_interpolation = o:grab_address{match=INTERPOLATION_TAGS} > 0
- if ADD_TIGER_COUNTY then
- local v = o:grab_tag('tiger:county')
- if v ~= nil then
- v, num = v:gsub(',.*', ' county')
- if num == 0 then
- v = v .. ' county'
- end
- o:set_address('tiger:county', v)
- end
- end
o:grab_address{match=ADDRESS_TAGS}
if is_interpolation then
- ":: lower ()"
- "[^a-z0-9[:Space:]] >"
- ":: NFC ()"
+ - "[:Space:]+ > ' '"
sanitizers:
- step: clean-housenumbers
filter-kind:
- step: clean-postcodes
convert-to-address: yes
default-pattern: "[A-Z0-9- ]{3,12}"
+ - step: clean-tiger-tags
- step: split-name-list
- step: strip-brace-terms
- step: tag-analyzer-by-language
INTERPOLATION_TAGS = tag_match{keys = {'addr:interpolation'}}
-ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*'}}
-ADD_TIGER_COUNTY = true
+ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*', 'tiger:county'}}
SAVE_EXTRA_MAINS = true
| N10003:place | place | island |
- Scenario: Shorten tiger:county tags
- When loading osm data
- """
- n11001 Tplace=village,tiger:county=Feebourgh%2c%%20%AL
- n11002 Tplace=village,addr:state=Alabama,tiger:county=Feebourgh%2c%%20%AL
- n11003 Tplace=village,tiger:county=Feebourgh
- """
- Then place contains exactly
- | object | class | address |
- | N11001 | place | 'tiger:county': 'Feebourgh county' |
- | N11002 | place | 'tiger:county': 'Feebourgh county', 'state': 'Alabama' |
- | N11003 | place | 'tiger:county': 'Feebourgh county' |
-
-
Scenario: Building fallbacks
When loading osm data
"""
self.code_coverage_path = config['PHPCOV']
self.code_coverage_id = 1
- self.default_config = Configuration(None, self.src_dir / 'settings').get_os_env()
+ self.default_config = Configuration(None).get_os_env()
self.test_env = None
self.template_db_done = False
self.api_db_done = False
self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
- self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
if self.tokenizer is not None:
self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
if self.import_style is not None:
def get_test_config(self):
- cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings',
- environ=self.test_env)
+ cfg = Configuration(Path(self.website_dir.name), environ=self.test_env)
cfg.set_libdirs(module=self.build_dir / 'module',
- osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql',
- php=self.src_dir / 'lib-php',
- sql=self.src_dir / 'lib-sql',
- data=self.src_dir / 'data')
+ osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql')
return cfg
def get_libpq_dsn(self):
cli.nominatim(module_dir='',
osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
- phplib_dir=str(self.src_dir / 'lib-php'),
- sqllib_dir=str(self.src_dir / 'lib-sql'),
- data_dir=str(self.src_dir / 'data'),
- config_dir=str(self.src_dir / 'settings'),
cli_args=cmdline,
phpcgi_path='',
environ=self.test_env)
def _call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
- phplib_dir=str(src_dir / 'lib-php'),
- data_dir=str(src_dir / 'data'),
phpcgi_path='/usr/bin/php-cgi',
- sqllib_dir=str(src_dir / 'lib-sql'),
- config_dir=str(src_dir / 'settings'),
cli_args=args)
return _call_nominatim
('restrict-to-osm-way', '727'),
('restrict-to-osm-relation', '197532')
])
-def test_export_parameters(src_dir, tmp_path, param, value):
+def test_export_parameters(src_dir, tmp_path, param, value, monkeypatch):
(tmp_path / 'admin').mkdir()
(tmp_path / 'admin' / 'export.php').write_text(f"""<?php
exit(strpos(implode(' ', $_SERVER['argv']), '--{param} {value}') >= 0 ? 0 : 10);
""")
+ monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_path)
+
assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
- phplib_dir=str(tmp_path),
- data_dir=str(src_dir / 'data'),
phpcgi_path='/usr/bin/php-cgi',
- sqllib_dir=str(src_dir / 'lib-sql'),
- config_dir=str(src_dir / 'settings'),
cli_args=['export', '--' + param, value]) == 0
@pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status')))
-def test_no_api_without_phpcgi(src_dir, endpoint):
+def test_no_api_without_phpcgi(endpoint):
assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
- phplib_dir=str(src_dir / 'lib-php'),
- data_dir=str(src_dir / 'data'),
phpcgi_path=None,
- sqllib_dir=str(src_dir / 'lib-sql'),
- config_dir=str(src_dir / 'settings'),
cli_args=[endpoint]) == 1
class TestCliApiCall:
@pytest.fixture(autouse=True)
- def setup_cli_call(self, cli_call):
- self.call_nominatim = cli_call
+ def setup_cli_call(self, params, cli_call, mock_func_factory, tmp_path):
+ self.mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
- def test_api_commands_simple(self, mock_func_factory, params, tmp_path):
+ def _run():
+ return cli_call(*params, '--project-dir', str(tmp_path))
+
+ self.run_nominatim = _run
+
+
+ def test_api_commands_simple(self, tmp_path, params):
(tmp_path / 'website').mkdir()
(tmp_path / 'website' / (params[0] + '.php')).write_text('')
- mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
- assert self.call_nominatim(*params, '--project-dir', str(tmp_path)) == 0
+ assert self.run_nominatim() == 0
- assert mock_run_api.called == 1
- assert mock_run_api.last_args[0] == params[0]
+ assert self.mock_run_api.called == 1
+ assert self.mock_run_api.last_args[0] == params[0]
- def test_bad_project_idr(self, mock_func_factory, params):
- mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
+ def test_bad_project_dir(self):
+ assert self.run_nominatim() == 1
- assert self.call_nominatim(*params) == 1
QUERY_PARAMS = {
'search': ('--query', 'somewhere'),
import nominatim.cli
import nominatim.indexer.indexer
import nominatim.tools.replication
+import nominatim.tools.refresh
from nominatim.db import status
@pytest.fixture
def test_replication_update_once_no_index(self, update_mock):
assert self.call_nominatim('--once', '--no-index') == 0
- assert str(update_mock.last_args[1]['osm2pgsql']) == 'OSM2PGSQL NOT AVAILABLE'
+ assert str(update_mock.last_args[1]['osm2pgsql']).endswith('OSM2PGSQL NOT AVAILABLE')
def test_replication_update_custom_osm2pgsql(self, monkeypatch, update_mock):
from nominatim.errors import UsageError
@pytest.fixture
-def make_config(src_dir):
+def make_config():
""" Create a configuration object from the given project directory.
"""
def _mk_config(project_dir=None):
- return Configuration(project_dir, src_dir / 'settings')
+ return Configuration(project_dir)
return _mk_config
@pytest.fixture
-def make_config_path(src_dir, tmp_path):
+def make_config_path(tmp_path):
""" Create a configuration object with project and config directories
in a temporary directory.
"""
def _mk_config():
(tmp_path / 'project').mkdir()
(tmp_path / 'config').mkdir()
- conf = Configuration(tmp_path / 'project', src_dir / 'settings')
+ conf = Configuration(tmp_path / 'project')
conf.config_dir = tmp_path / 'config'
return conf
"""
(tmp_path / 'project').mkdir()
(tmp_path / 'config').mkdir()
- conf = Configuration(tmp_path / 'project', src_dir / 'settings')
+ conf = Configuration(tmp_path / 'project')
conf.config_dir = tmp_path / 'config'
return conf
@pytest.fixture
-def def_config(src_dir):
- cfg = Configuration(None, src_dir / 'settings')
- cfg.set_libdirs(module='.', osm2pgsql='.',
- php=src_dir / 'lib-php',
- sql=src_dir / 'lib-sql',
- data=src_dir / 'data')
+def def_config():
+ cfg = Configuration(None)
+ cfg.set_libdirs(module='.', osm2pgsql='.')
return cfg
@pytest.fixture
-def project_env(src_dir, tmp_path):
+def project_env(tmp_path):
projdir = tmp_path / 'project'
projdir.mkdir()
- cfg = Configuration(projdir, src_dir / 'settings')
- cfg.set_libdirs(module='.', osm2pgsql='.',
- php=src_dir / 'lib-php',
- sql=src_dir / 'lib-sql',
- data=src_dir / 'data')
+ cfg = Configuration(projdir)
+ cfg.set_libdirs(module='.', osm2pgsql='.')
return cfg
@pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
- cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
- cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
- sql=tmp_path, data=SRC_DIR / 'data')
+ cfg = Configuration(None)
+ cfg.set_libdirs(module='.', osm2pgsql='.', sql=tmp_path)
return cfg
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for sanitizer that clean up TIGER tags.
+"""
+import pytest
+
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from nominatim.data.place_info import PlaceInfo
+
+class TestCleanTigerTags:
+
+ @pytest.fixture(autouse=True)
+ def setup_country(self, def_config):
+ self.config = def_config
+
+
+ def run_sanitizer_on(self, addr):
+ place = PlaceInfo({'address': addr})
+ _, outaddr = PlaceSanitizer([{'step': 'clean-tiger-tags'}], self.config).process_names(place)
+
+ return sorted([(p.name, p.kind, p.suffix) for p in outaddr])
+
+ @pytest.mark.parametrize('inname,outname', [('Hamilton, AL', 'Hamilton'),
+ ('Little, Borough, CA', 'Little, Borough')])
+ def test_well_formatted(self, inname, outname):
+ assert self.run_sanitizer_on({'tiger:county': inname})\
+ == [(outname, 'county', 'tiger')]
+
+
+ @pytest.mark.parametrize('name', ('Hamilton', 'Big, Road', ''))
+ def test_badly_formatted(self, name):
+ assert self.run_sanitizer_on({'tiger:county': name})\
+ == [(name, 'county', 'tiger')]
+
+
+ def test_unmatched(self):
+ assert self.run_sanitizer_on({'tiger:country': 'US'})\
+ == [('US', 'tiger', 'country')]
import pytest
+from nominatim.config import Configuration
import nominatim.tools.exec_utils as exec_utils
+import nominatim.paths
class TestRunLegacyScript:
@pytest.fixture(autouse=True)
- def setup_nominatim_env(self, tmp_path, def_config):
+ def setup_nominatim_env(self, tmp_path, monkeypatch):
tmp_phplib_dir = tmp_path / 'phplib'
tmp_phplib_dir.mkdir()
(tmp_phplib_dir / 'admin').mkdir()
- class _NominatimEnv:
- config = def_config
- phplib_dir = tmp_phplib_dir
- data_dir = Path('data')
- project_dir = Path('.')
- sqllib_dir = Path('lib-sql')
- config_dir = Path('settings')
- module_dir = 'module'
- osm2pgsql_path = 'osm2pgsql'
+ monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_phplib_dir)
- self.testenv = _NominatimEnv
+ self.phplib_dir = tmp_phplib_dir
+ self.config = Configuration(tmp_path)
+ self.config.set_libdirs(module='.', osm2pgsql='default_osm2pgsql',
+ php=tmp_phplib_dir)
def mk_script(self, code):
- codefile = self.testenv.phplib_dir / 'admin' / 't.php'
+ codefile = self.phplib_dir / 'admin' / 't.php'
codefile.write_text('<?php\n' + code + '\n')
return 't.php'
def test_run_legacy_return_exit_code(self, return_code):
fname = self.mk_script('exit({});'.format(return_code))
assert return_code == \
- exec_utils.run_legacy_script(fname, nominatim_env=self.testenv)
+ exec_utils.run_legacy_script(fname, config=self.config)
def test_run_legacy_return_throw_on_fail(self):
fname = self.mk_script('exit(11);')
with pytest.raises(subprocess.CalledProcessError):
- exec_utils.run_legacy_script(fname, nominatim_env=self.testenv,
+ exec_utils.run_legacy_script(fname, config=self.config,
throw_on_fail=True)
def test_run_legacy_return_dont_throw_on_success(self):
fname = self.mk_script('exit(0);')
- assert exec_utils.run_legacy_script(fname, nominatim_env=self.testenv,
+ assert exec_utils.run_legacy_script(fname, config=self.config,
throw_on_fail=True) == 0
def test_run_legacy_use_given_module_path(self):
fname = self.mk_script("exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == '' ? 0 : 23);")
- assert exec_utils.run_legacy_script(fname, nominatim_env=self.testenv) == 0
+ assert exec_utils.run_legacy_script(fname, config=self.config) == 0
def test_run_legacy_do_not_overwrite_module_path(self, monkeypatch):
fname = self.mk_script(
"exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == 'other' ? 0 : 1);")
- assert exec_utils.run_legacy_script(fname, nominatim_env=self.testenv) == 0
+ assert exec_utils.run_legacy_script(fname, config=self.config) == 0
def test_run_legacy_default_osm2pgsql_binary(self, monkeypatch):
- fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'osm2pgsql' ? 0 : 23);")
+ fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'default_osm2pgsql' ? 0 : 23);")
- assert exec_utils.run_legacy_script(fname, nominatim_env=self.testenv) == 0
+ assert exec_utils.run_legacy_script(fname, config=self.config) == 0
def test_run_legacy_override_osm2pgsql_binary(self, monkeypatch):
fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'somethingelse' ? 0 : 23);")
- assert exec_utils.run_legacy_script(fname, nominatim_env=self.testenv) == 0
+ assert exec_utils.run_legacy_script(fname, config=self.config) == 0
class TestRunApiScript: