description: 'Version of Ubuntu to install on'
required: false
default: '20'
+ cmake-args:
+ description: 'Additional options to hand to cmake'
+ required: false
+ default: ''
runs:
using: "composite"
shell: bash
env:
UBUNTUVER: ${{ inputs.ubuntu }}
-
- - name: Download dependencies
- run: |
- if [ ! -f country_grid.sql.gz ]; then
- wget --no-verbose https://www.nominatim.org/data/country_grid.sql.gz
- fi
- cp country_grid.sql.gz Nominatim/data/country_osm_grid.sql.gz
- shell: bash
+ CMAKE_ARGS: ${{ inputs.cmake-args }}
- name: Configure
- run: mkdir build && cd build && cmake ../Nominatim
+ run: mkdir build && cd build && cmake $CMAKE_ARGS ../Nominatim
shell: bash
+ env:
+ CMAKE_ARGS: ${{ inputs.cmake-args }}
- name: Build
run: |
- name: Install PostgreSQL
run: |
- sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER} postgresql-server-dev-${PGVER}
+ sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER}
shell: bash
env:
PGVER: ${{ inputs.postgresql-version }}
working-directory: Nominatim/test/bdd
- icu-test:
+ legacy-test:
needs: create-archive
- strategy:
- matrix:
- ubuntu: [20]
- include:
- - ubuntu: 20
- postgresql: 13
- postgis: 3
- pytest: py.test-3
- php: 7.4
-
- runs-on: ubuntu-${{ matrix.ubuntu }}.04
+ runs-on: ubuntu-20.04
steps:
- uses: actions/download-artifact@v2
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
- php-version: ${{ matrix.php }}
- coverage: xdebug
- tools: phpunit, phpcs, composer
-
- - uses: actions/setup-python@v2
- with:
- python-version: 3.6
- if: matrix.ubuntu == 18
+ php-version: 7.4
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
- postgresql-version: ${{ matrix.postgresql }}
- postgis-version: ${{ matrix.postgis }}
+ postgresql-version: 13
+ postgis-version: 3
+
+ - name: Install Postgresql server dev
+ run: sudo apt-get install postgresql-server-dev-13
- uses: ./Nominatim/.github/actions/build-nominatim
with:
- ubuntu: ${{ matrix.ubuntu }}
+ ubuntu: 20
+ cmake-args: -DBUILD_MODULE=on
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-behave
- if: matrix.ubuntu == 20
-
- - name: Install test prerequsites
- run: pip3 install behave==1.2.6
- if: matrix.ubuntu == 18
- - name: BDD tests (icu tokenizer)
+ - name: BDD tests (legacy tokenizer)
run: |
- behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=icu --format=progress3
+ behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
working-directory: Nominatim/test/bdd
set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
set(BUILD_API on CACHE BOOL "Build everything for the API server")
-set(BUILD_MODULE on CACHE BOOL "Build PostgreSQL module")
+set(BUILD_MODULE off CACHE BOOL "Build PostgreSQL module for legacy tokenizer")
set(BUILD_TESTS on CACHE BOOL "Build test suite")
set(BUILD_DOCS on CACHE BOOL "Build documentation")
set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
sudo make install
```
+!!! warning
+ The default installation no longer compiles the PostgreSQL module that
+ is needed for the legacy tokenizer from older Nominatim versions. If you
+ are upgrading an older database or want to run the
+ [legacy tokenizer](../customize/Tokenizers.md#legacy-tokenizer) for
+ some other reason, you need to enable the PostgreSQL module via
+ cmake: `cmake -DBUILD_MODULE=on ../Nominatim`. To compile the module
+ you need to have the server development headers for PostgreSQL installed.
+ On Ubuntu/Debian run: `sudo apt install postgresql-server-dev-<postgresql version>`
+
+
Nominatim installs itself into `/usr/local` per default. To choose a different
installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
cmake command. Make sure that the `bin` directory is available in your path
## 4.0.0 -> master
+### ICU tokenizer is the new default
+
+Nominatim now installs the [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer)
+by default. This only has an effect on newly installed databases. When
+updating older databases, it keeps its installed tokenizer. If you still
+run with the legacy tokenizer, make sure to compile Nominatim with the
+PostgreSQL module, see [Installation](Installation.md#building-nominatim).
+
### geocodejson output changed
The `type` field of the geocodejson output has changed. It now contains
The legacy tokenizer implements the analysis algorithms of older Nominatim
versions. It uses a special Postgresql module to normalize names and queries.
-This tokenizer is currently the default.
+This tokenizer is automatically installed and used when upgrading an older
+database. It should not be used for new installations anymore.
+
+### Compiling the PostgreSQL module
+
+The tokeinzer needs a special C module for PostgreSQL which is not compiled
+by default. If you need the legacy tokenizer, compile Nominatim as follows:
+
+```
+mkdir build
+cd build
+cmake -DBUILD_MODULE=on
+make
+```
+
+### Enabling the tokenizer
To enable the tokenizer add the following line to your project configuration:
The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
normalize names and queries. It also offers configurable decomposition and
abbreviation handling.
+This tokenizer is currently the default.
To enable the tokenizer add the following line to your project configuration:
if configfile.suffix in ('.yaml', '.yml'):
result = self._load_from_yaml(configfile)
elif configfile.suffix == '.json':
- with configfile.open('r') as cfg:
+ with configfile.open('r', encoding='utf-8') as cfg:
result = json.load(cfg)
else:
raise UsageError(f"Config file '{configfile}' has unknown format.")
@define('CONST_Max_Word_Frequency', 10000000);
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
- require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+ require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
def _save_config(self):
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
- """.format(config)))
+ """.format(config)), encoding='utf-8')
def _init_db_tables(self, config):
# Tokenizer used for normalizing and parsing queries and names.
# The tokenizer is set up during import and cannot be changed afterwards
# without a reimport.
-# Currently available tokenizers: legacy
-NOMINATIM_TOKENIZER="legacy"
+# Currently available tokenizers: icu, legacy
+NOMINATIM_TOKENIZER="icu"
# Number of occurrences of a word before it is considered frequent.
# Similar to the concept of stop words. Frequent partial words get ignored
def before_tag(context, tag):
if tag == 'fail-legacy':
- if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
+ if context.config.userdata['TOKENIZER'] == 'legacy':
context.scenario.skip("Not implemented in legacy tokenizer")
self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
self.run_nominatim('freeze')
- if self.tokenizer != 'icu':
+ if self.tokenizer == 'legacy':
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
else:
plist.sort()
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
- if nctx.tokenizer == 'icu':
+ if nctx.tokenizer != 'legacy':
cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
(plist,))
else:
country_code VARCHAR(2)""")
-@pytest.fixture
-def word_table(temp_db_conn):
- return mocks.MockWordTable(temp_db_conn)
-
-
@pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
from nominatim.db import properties
# This must always point to the mock word table for the default tokenizer.
-from mock_legacy_word_table import MockLegacyWordTable as MockWordTable
+from mock_icu_word_table import MockIcuWordTable as MockWordTable
class MockPlacexTable:
""" A placex table for testing.
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, place_row, placex_table, osmline_table,
- word_table, temp_db_cursor, threads):
+ temp_db_cursor, threads):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute(f"""CREATE FUNCTION {func} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
from nominatim.errors import UsageError
import nominatim.version
+from mock_legacy_word_table import MockLegacyWordTable
+
class DummyTokenizer:
def update_sql_functions(self, config):
monkeypatch.setattr(migration.tokenizer_factory, 'get_tokenizer_for_db',
lambda *args: DummyTokenizer())
+@pytest.fixture
+def legacy_word_table(temp_db_conn):
+ return MockLegacyWordTable(temp_db_conn)
+
def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config):
table_factory('country_name', 'name HSTORE, country_code TEXT')
def test_change_housenumber_transliteration(temp_db_conn, temp_db_cursor,
- word_table, placex_table):
+ legacy_word_table, placex_table):
placex_table.add(housenumber='3A')
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
return dummy_tokenizer.DummyTokenizer(None, None)
@pytest.fixture
-def postcode_table(temp_db_conn, placex_table, word_table):
+def postcode_table(temp_db_conn, placex_table):
return MockPostcodeTable(temp_db_conn)
sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev\
libbz2-dev libpq-dev libproj-dev \
- postgresql-server-dev-10 postgresql-10-postgis-2.4 \
+ postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl libicu-dev python3-pip \
- python3-psutil python3-jinja2 python3-icu git
+ python3-psutil python3-jinja2 python3-yaml python3-icu git
# Some of the Python packages that come with Ubuntu 18.04 are too old, so
# install the latest version from pip:
sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev \
libbz2-dev libpq-dev libproj-dev \
- postgresql-server-dev-12 postgresql-12-postgis-3 \
+ postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 \
- python3-icu python3-datrie git
+ python3-icu python3-datrie python3-yaml git
#
# System Configuration