]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 11 Jul 2022 13:38:26 +0000 (15:38 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 11 Jul 2022 13:38:26 +0000 (15:38 +0200)
42 files changed:
.github/actions/build-nominatim/action.yml
.github/workflows/ci-tests.yml
docs/CMakeLists.txt
docs/mkdocs.yml
lib-php/ParameterParser.php
lib-php/PlaceLookup.php
lib-php/SimpleWordList.php
lib-php/template/address-geocodejson.php
lib-php/template/address-geojson.php
lib-php/template/address-json.php
lib-php/template/details-json.php
lib-php/template/search-batch-json.php
lib-php/template/search-geocodejson.php
lib-php/template/search-geojson.php
lib-php/template/search-json.php
nominatim/clicmd/setup.py
nominatim/data/country_info.py [moved from nominatim/tools/country_info.py with 100% similarity]
nominatim/data/place_info.py [moved from nominatim/indexer/place_info.py with 87% similarity]
nominatim/data/postcode_format.py
nominatim/indexer/runners.py
nominatim/tokenizer/base.py
nominatim/tokenizer/icu_rule_loader.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py
nominatim/tools/tiger_data.py
test/php/Nominatim/ParameterParserTest.php
test/php/Nominatim/SimpleWordListTest.php
test/python/cli/test_cmd_import.py
test/python/data/test_country_info.py [moved from test/python/tools/test_country_info.py with 99% similarity]
test/python/db/test_utils.py
test/python/dummy_tokenizer.py
test/python/tokenizer/sanitizers/test_clean_housenumbers.py
test/python/tokenizer/sanitizers/test_clean_postcodes.py
test/python/tokenizer/sanitizers/test_split_name_list.py
test/python/tokenizer/sanitizers/test_strip_brace_terms.py
test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py
test/python/tokenizer/test_icu.py
test/python/tokenizer/test_legacy.py
test/python/tokenizer/test_place_sanitizer.py
test/python/tools/test_exec_utils.py
test/python/tools/test_postcodes.py
vagrant/Install-on-Ubuntu-22.sh [new file with mode: 0755]

index 042166ad7fc357a8b4b129961290e2e3ae0cf147..c6ff7a31ca599caff9e35b51081e88ebc0c1fd28 100644 (file)
@@ -14,6 +14,11 @@ runs:
     using: "composite"
 
     steps:
+        - name: Clean out the disk
+          run: |
+            sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
+            df -h
+          shell: bash
         - name: Install prerequisites
           run: |
             sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev
index 4ce14f9365eb96ef04b184e00eb93962c4674fc9..53b76a035d7688eb39dc43116ee2f138cf7a4b85 100644 (file)
@@ -37,7 +37,7 @@ jobs:
         needs: create-archive
         strategy:
             matrix:
-                ubuntu: [18, 20]
+                ubuntu: [18, 20, 22]
                 include:
                     - ubuntu: 18
                       postgresql: 9.6
@@ -49,6 +49,11 @@ jobs:
                       postgis: 3
                       pytest: py.test-3
                       php: 7.4
+                    - ubuntu: 22
+                      postgresql: 14
+                      postgis: 3
+                      pytest: py.test-3
+                      php: 8.1
 
         runs-on: ubuntu-${{ matrix.ubuntu }}.04
 
@@ -65,6 +70,7 @@ jobs:
               with:
                   php-version: ${{ matrix.php }}
                   tools: phpunit, phpcs, composer
+                  ini-values: opcache.jit=disable
 
             - uses: actions/setup-python@v2
               with:
@@ -85,8 +91,12 @@ jobs:
               if: matrix.ubuntu == 20
 
             - name: Install test prerequsites
-              run: pip3 install pytest behave==1.2.6
-              if: matrix.ubuntu == 18
+              run: pip3 install pylint pytest behave==1.2.6
+              if: ${{ (matrix.ubuntu == 18) || (matrix.ubuntu == 22) }}
+
+            - name: Install test prerequsites
+              run: sudo apt-get install -y -qq python3-pytest
+              if: matrix.ubuntu == 22
 
             - name: Install latest pylint
               run: pip3 install pylint
@@ -102,7 +112,7 @@ jobs:
             - name: PHP unit tests
               run: phpunit ./
               working-directory: Nominatim/test/php
-              if: matrix.ubuntu == 20
+              if: ${{ (matrix.ubuntu == 20) || (matrix.ubuntu == 22) }}
 
             - name: Python unit tests
               run: $PYTEST test/python
@@ -161,7 +171,7 @@ jobs:
 
         strategy:
             matrix:
-                name: [Ubuntu-18, Ubuntu-20]
+                name: [Ubuntu-18, Ubuntu-20, Ubuntu-22]
                 include:
                     - name: Ubuntu-18
                       flavour: ubuntu
@@ -173,6 +183,11 @@ jobs:
                       image: "ubuntu:20.04"
                       ubuntu: 20
                       install_mode: install-apache
+                    - name: Ubuntu-22
+                      flavour: ubuntu
+                      image: "ubuntu:22.04"
+                      ubuntu: 22
+                      install_mode: install-apache
 
         container:
             image: ${{ matrix.image }}
index 3b9a316b191f937c8254d5cbd53c9f204078836a..0ccc5974d23adcf484201f6e0e036dbc9e91a338 100644 (file)
@@ -26,6 +26,7 @@ ADD_CUSTOM_TARGET(doc
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-8.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-8.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
+   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
    COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
 )
 
index a156fb7115417d60647e7d01a787cb06dda85811..48fe1d0d165f99b6b8a7b704ad620e7a4367d062 100644 (file)
@@ -46,6 +46,7 @@ nav:
         - 'Installation on CentOS 8' : 'appendix/Install-on-Centos-8.md'
         - 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
         - 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
+        - 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
 markdown_extensions:
     - codehilite
     - admonition
index 98b95388ce678f7d52a2711dc18b29829414801d..070be36c24a3856448ef3de965298690001dac33 100644 (file)
@@ -70,7 +70,7 @@ class ParameterParser
             return $sDefault;
         }
 
-        if (!in_array($this->aParams[$sName], $aValues)) {
+        if (!in_array($this->aParams[$sName], $aValues, true)) {
             userError("Parameter '$sName' must be one of: ".join(', ', $aValues));
         }
 
index 715f1ced3daab48dbd95b374465f44ff6e93d050..33156537459b87019ba3fe269d1d07a3f58d8032 100644 (file)
@@ -445,7 +445,7 @@ class PlaceLookup
 
             if ($this->bExtraTags) {
                 if ($aPlace['extra']) {
-                    $aPlace['sExtraTags'] = json_decode($aPlace['extra']);
+                    $aPlace['sExtraTags'] = json_decode($aPlace['extra'], true);
                 } else {
                     $aPlace['sExtraTags'] = (object) array();
                 }
@@ -482,7 +482,7 @@ class PlaceLookup
             return (object) array();
         }
 
-        $aFullNames = json_decode($sNames);
+        $aFullNames = json_decode($sNames, true);
         $aNames = array();
 
         foreach ($aFullNames as $sKey => $sValue) {
index ecd02153da7439b947e0899e5a176b17be033273..7009d370f9bb0e992f49eb47547ce5ef950b0f5b 100644 (file)
@@ -120,13 +120,18 @@ class SimpleWordList
         return array_slice($aWordSets, 0, SimpleWordList::MAX_WORDSETS);
     }
 
+    /**
+     * Custom search routine which takes two arrays. The array with the fewest
+     * items wins. If same number of items then the one with the longest first
+     * element wins.
+     */
     public static function cmpByArraylen($aA, $aB)
     {
         $iALen = count($aA);
         $iBLen = count($aB);
 
         if ($iALen == $iBLen) {
-            return 0;
+            return strlen($aB[0]) <=> strlen($aA[0]);
         }
 
         return ($iALen < $iBLen) ? -1 : 1;
index 8a0a6289a9697a9a3a5c84ecb95c3224875ad02e..d54aef4005b26f843d890641f68e74aac5774bd7 100644 (file)
@@ -56,7 +56,7 @@ if (empty($aPlace)) {
     }
 
     if (isset($aPlace['asgeojson'])) {
-        $aFilteredPlaces['geometry'] = json_decode($aPlace['asgeojson']);
+        $aFilteredPlaces['geometry'] = json_decode($aPlace['asgeojson'], true);
     } else {
         $aFilteredPlaces['geometry'] = array(
                                         'type' => 'Point',
index 206b959f2261a1b329ef3279a01afb205f6a0111..dc3c3832af7ce09fc32be88df4d5ab9d02256d4b 100644 (file)
@@ -65,7 +65,7 @@ if (empty($aPlace)) {
     }
 
     if (isset($aPlace['asgeojson'])) {
-        $aFilteredPlaces['geometry'] = json_decode($aPlace['asgeojson']);
+        $aFilteredPlaces['geometry'] = json_decode($aPlace['asgeojson'], true);
     } else {
         $aFilteredPlaces['geometry'] = array(
                                         'type' => 'Point',
index 1a429acb3a2d44e730b300604c43b7f614879da9..0766eaf4a992a643501dfa7dde2f3ea5ca1bacb0 100644 (file)
@@ -63,7 +63,7 @@ if (empty($aPlace)) {
     }
 
     if (isset($aPlace['asgeojson'])) {
-        $aFilteredPlaces['geojson'] = json_decode($aPlace['asgeojson']);
+        $aFilteredPlaces['geojson'] = json_decode($aPlace['asgeojson'], true);
     }
 
     if (isset($aPlace['assvg'])) {
index 68a0ce25b82d285a5cd75aeef043a831b335d82a..ae80a85b262a205b108c5d58526a920213200b61 100644 (file)
@@ -48,7 +48,7 @@ $aPlaceDetails['centroid'] = array(
                               'coordinates' => array( (float) $aPointDetails['lon'], (float) $aPointDetails['lat'] )
                              );
 
-$aPlaceDetails['geometry'] = json_decode($aPointDetails['asgeojson']);
+$aPlaceDetails['geometry'] = json_decode($aPointDetails['asgeojson'], true);
 
 $funcMapAddressLine = function ($aFull) {
     return array(
index cbf25c874f1e4b5bcb2f3cc3680c375844bb8dd8..430237a294a5d33782323fab3d789f5932730436 100644 (file)
@@ -60,7 +60,7 @@ foreach ($aBatchResults as $aSearchResults) {
         }
 
         if (isset($aPointDetails['asgeojson'])) {
-            $aPlace['geojson'] = json_decode($aPointDetails['asgeojson']);
+            $aPlace['geojson'] = json_decode($aPointDetails['asgeojson'], true);
         }
 
         if (isset($aPointDetails['assvg'])) {
index 5439e3cfe532e61140a3293701d8a1a5c6517926..bba41a0d2210532c30afe3f7c0e34ddde0b4df68 100644 (file)
@@ -46,7 +46,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
     }
 
     if (isset($aPointDetails['asgeojson'])) {
-        $aPlace['geometry'] = json_decode($aPointDetails['asgeojson']);
+        $aPlace['geometry'] = json_decode($aPointDetails['asgeojson'], true);
     } else {
         $aPlace['geometry'] = array(
                                'type' => 'Point',
index f517142f861b9848dd9797761b4a8a388e1b1e1c..7665700dff47b08995f517432a74de506521fdfc 100644 (file)
@@ -54,7 +54,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
     }
 
     if (isset($aPointDetails['asgeojson'])) {
-        $aPlace['geometry'] = json_decode($aPointDetails['asgeojson']);
+        $aPlace['geometry'] = json_decode($aPointDetails['asgeojson'], true);
     } else {
         $aPlace['geometry'] = array(
                                'type' => 'Point',
index 3f9e0a571d9928bc1a0fbdc298c90a1134537fc3..5fb1302080ac4ced5cebb5523a1257755b04ccbe 100644 (file)
@@ -53,7 +53,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
     }
 
     if (isset($aPointDetails['asgeojson'])) {
-        $aPlace['geojson'] = json_decode($aPointDetails['asgeojson']);
+        $aPlace['geojson'] = json_decode($aPointDetails['asgeojson'], true);
     }
 
     if (isset($aPointDetails['assvg'])) {
index f0ec358bc50d4d4484c23338d323350c387360ac..73095468ab2fe30b4e50b9d2b889346030a1a92c 100644 (file)
@@ -63,8 +63,9 @@ class SetupAll:
 
 
     @staticmethod
-    def run(args):
-        from ..tools import database_import, refresh, postcodes, freeze, country_info
+    def run(args): # pylint: disable=too-many-statements
+        from ..data import country_info
+        from ..tools import database_import, refresh, postcodes, freeze
         from ..indexer.indexer import Indexer
 
         country_info.setup_country_config(args.config)
similarity index 87%
rename from nominatim/indexer/place_info.py
rename to nominatim/data/place_info.py
index 87ecb731b9ee8e989c5069ecd9ae3c54161676f0..d2ba3979260fcfa2fd411771b844c3d67807c440 100644 (file)
@@ -9,8 +9,6 @@ Wrapper around place information the indexer gets from the database and hands to
 the tokenizer.
 """
 
-import psycopg2.extras
-
 class PlaceInfo:
     """ Data class containing all information the tokenizer gets about a
         place it should process the names for.
@@ -20,13 +18,6 @@ class PlaceInfo:
         self._info = info
 
 
-    def analyze(self, analyzer):
-        """ Process this place with the given tokenizer and return the
-            result in psycopg2-compatible Json.
-        """
-        return psycopg2.extras.Json(analyzer.process_place(self))
-
-
     @property
     def name(self):
         """ A dictionary with the names of the place or None if the place
index 6ae43b7d50bc3f2eab0040401a32f43001d0bbb0..366ea5057b322b7ddf77ee7d5937069e7f7b9846 100644 (file)
@@ -11,7 +11,7 @@ format.
 import re
 
 from nominatim.errors import UsageError
-from nominatim.tools import country_info
+from nominatim.data import country_info
 
 class CountryPostcodeMatcher:
     """ Matches and formats a postcode according to a format definition
index 9a30ffe6b10c0c441efe6e1c61e892d996dfccaf..c8495ee4df115ef96cf7971cff7c7eb1318948a0 100644 (file)
@@ -11,14 +11,17 @@ tasks.
 import functools
 
 from psycopg2 import sql as pysql
+import psycopg2.extras
 
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 # pylint: disable=C0111
 
 def _mk_valuelist(template, num):
     return pysql.SQL(',').join([pysql.SQL(template)] * num)
 
+def _analyze_place(place, analyzer):
+    return psycopg2.extras.Json(analyzer.process_place(PlaceInfo(place)))
 
 class AbstractPlacexRunner:
     """ Returns SQL commands for indexing of the placex table.
@@ -56,7 +59,7 @@ class AbstractPlacexRunner:
         for place in places:
             for field in ('place_id', 'name', 'address', 'linked_place_id'):
                 values.append(place[field])
-            values.append(PlaceInfo(place).analyze(self.analyzer))
+            values.append(_analyze_place(place, self.analyzer))
 
         worker.perform(self._index_sql(len(places)), values)
 
@@ -150,7 +153,7 @@ class InterpolationRunner:
         values = []
         for place in places:
             values.extend((place[x] for x in ('place_id', 'address')))
-            values.append(PlaceInfo(place).analyze(self.analyzer))
+            values.append(_analyze_place(place, self.analyzer))
 
         worker.perform(self._index_sql(len(places)), values)
 
index f81b3bc262ed9f690b0eb6bc794e2cab076de822..70a54bfdc28141e62e0e5e63e5e52342044b5a14 100644 (file)
@@ -12,7 +12,7 @@ from abc import ABC, abstractmethod
 from typing import List, Tuple, Dict, Any
 
 from nominatim.config import Configuration
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 # pylint: disable=unnecessary-pass
 
index 8a564355e35c03e4ed715363d505224efac1a6cd..035b6698511035fbaf6dfed1f137aeb8e911cce1 100644 (file)
@@ -17,7 +17,7 @@ from nominatim.db.properties import set_property, get_property
 from nominatim.errors import UsageError
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis
-import nominatim.tools.country_info
+import nominatim.data.country_info
 
 LOG = logging.getLogger()
 
@@ -46,7 +46,7 @@ class ICURuleLoader:
                                               config='TOKENIZER_CONFIG')
 
         # Make sure country information is available to analyzers and sanitizers.
-        nominatim.tools.country_info.setup_country_config(config)
+        nominatim.data.country_info.setup_country_config(config)
 
         self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
         self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
index a6ff08a40774736c363965bada9a23101f2ccd86..171d43926cc42be9695ef11522acd7ecf1ed07c8 100644 (file)
@@ -16,7 +16,7 @@ from textwrap import dedent
 from nominatim.db.connection import connect
 from nominatim.db.utils import CopyBuffer
 from nominatim.db.sql_preprocessor import SQLPreprocessor
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
 from nominatim.tokenizer.base import AbstractAnalyzer, AbstractTokenizer
 
index 9a99d127728290264c7762f7c76fefb7177f3267..d3413c1ac79d39b5be68a8092ded62b43659e589 100644 (file)
@@ -30,7 +30,7 @@ Arguments:
           any analyzer tagged) is retained. (default: replace)
 
 """
-from nominatim.tools import country_info
+from nominatim.data import country_info
 
 class _AnalyzerByLanguage:
     """ Processor for tagging the language of names in a place.
index 6e37df5e9df7d2b6b3228c4ba79b8a5350865386..e78dcd8f3ade0695eec2f1842f21f40f3eca80ad 100644 (file)
@@ -13,11 +13,13 @@ import logging
 import os
 import tarfile
 
+from psycopg2.extras import Json
+
 from nominatim.db.connection import connect
 from nominatim.db.async_connection import WorkerPool
 from nominatim.db.sql_preprocessor import SQLPreprocessor
 from nominatim.errors import UsageError
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 LOG = logging.getLogger()
 
@@ -87,7 +89,7 @@ def handle_threaded_sql_statements(pool, fd, analyzer):
             address = dict(street=row['street'], postcode=row['postcode'])
             args = ('SRID=4326;' + row['geometry'],
                     int(row['from']), int(row['to']), row['interpolation'],
-                    PlaceInfo({'address': address}).analyze(analyzer),
+                    Json(analyzer.process_place(PlaceInfo({'address': address}))),
                     analyzer.normalize_postcode(row['postcode']))
         except ValueError:
             continue
index 1488c987a22218f82b97471ec54f57c40c9404dc..7381bdf84a9cca05f13a0aff3ce6f0804076d62b 100644 (file)
@@ -137,6 +137,9 @@ class ParameterParserTest extends \PHPUnit\Framework\TestCase
 
     public function testGetSet()
     {
+        $this->expectException(\Exception::class);
+        $this->expectExceptionMessage("Parameter 'val3' must be one of: foo, bar");
+
         $oParams = new ParameterParser(array(
                                         'val1' => 'foo',
                                         'val2' => '',
@@ -148,7 +151,7 @@ class ParameterParserTest extends \PHPUnit\Framework\TestCase
         $this->assertSame('foo', $oParams->getSet('val1', array('foo', 'bar')));
 
         $this->assertSame(false, $oParams->getSet('val2', array('foo', 'bar')));
-        $this->assertSame(0, $oParams->getSet('val3', array('foo', 'bar')));
+        $oParams->getSet('val3', array('foo', 'bar'));
     }
 
 
index fe543b3faaaaabc09d757e3de698f9f8445523a5..69cb51809178352d99bbe4df94599d281934f8ed 100644 (file)
@@ -77,7 +77,7 @@ class SimpleWordListTest extends \PHPUnit\Framework\TestCase
 
         $oList = new SimpleWordList('a b c');
         $this->assertEquals(
-            '(a b c),(a|b c),(a b|c),(a|b|c)',
+            '(a b c),(a b|c),(a|b c),(a|b|c)',
             $this->serializeSets($oList->getWordSets(new TokensFullSet()))
         );
 
@@ -88,6 +88,22 @@ class SimpleWordListTest extends \PHPUnit\Framework\TestCase
         );
     }
 
+    public function testCmpByArraylen()
+    {
+        // Array elements are phrases, we want to sort so longest phrases are first
+        $aList1 = array('hackney', 'bridge', 'london', 'england');
+        $aList2 = array('hackney', 'london', 'bridge');
+        $aList3 = array('bridge', 'hackney', 'london', 'england');
+
+        $this->assertEquals(0, \Nominatim\SimpleWordList::cmpByArraylen($aList1, $aList1));
+
+        // list2 "wins". Less array elements
+        $this->assertEquals(1, \Nominatim\SimpleWordList::cmpByArraylen($aList1, $aList2));
+        $this->assertEquals(-1, \Nominatim\SimpleWordList::cmpByArraylen($aList2, $aList3));
+
+        // list1 "wins". Same number of array elements but longer first element
+        $this->assertEquals(-1, \Nominatim\SimpleWordList::cmpByArraylen($aList1, $aList3));
+    }
 
     public function testMaxWordSets()
     {
index 84c7743ab8f9f6d69675eea460e109db39e1a2c5..d545c76051bd478d414f549d673ce4d179823d04 100644 (file)
@@ -10,7 +10,7 @@ Tests for import command of the command-line interface wrapper.
 import pytest
 
 import nominatim.tools.database_import
-import nominatim.tools.country_info
+import nominatim.data.country_info
 import nominatim.tools.refresh
 import nominatim.tools.postcodes
 import nominatim.indexer.indexer
@@ -37,7 +37,7 @@ class TestCliImportWithDb:
     def test_import_full(self, mock_func_factory, with_updates, place_table, property_table):
         mocks = [
             mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
-            mock_func_factory(nominatim.tools.country_info, 'setup_country_tables'),
+            mock_func_factory(nominatim.data.country_info, 'setup_country_tables'),
             mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
             mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
             mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
@@ -46,7 +46,7 @@ class TestCliImportWithDb:
             mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'),
             mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
             mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
-            mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
+            mock_func_factory(nominatim.data.country_info, 'create_country_names'),
             mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_config'),
             mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
             mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
@@ -76,7 +76,7 @@ class TestCliImportWithDb:
             mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
             mock_func_factory(nominatim.tools.database_import, 'load_data'),
             mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
-            mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
+            mock_func_factory(nominatim.data.country_info, 'create_country_names'),
             mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
             mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
             mock_func_factory(nominatim.tools.refresh, 'setup_website'),
@@ -94,7 +94,7 @@ class TestCliImportWithDb:
                                       temp_db_conn):
         mocks = [
             mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
-            mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
+            mock_func_factory(nominatim.data.country_info, 'create_country_names'),
             mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
             mock_func_factory(nominatim.tools.refresh, 'setup_website'),
             mock_func_factory(nominatim.db.properties, 'set_property')
@@ -115,7 +115,7 @@ class TestCliImportWithDb:
     def test_import_continue_postprocess(self, mock_func_factory):
         mocks = [
             mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
-            mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
+            mock_func_factory(nominatim.data.country_info, 'create_country_names'),
             mock_func_factory(nominatim.tools.refresh, 'setup_website'),
             mock_func_factory(nominatim.db.properties, 'set_property')
         ]
similarity index 99%
rename from test/python/tools/test_country_info.py
rename to test/python/data/test_country_info.py
index 3f00d54e1706d5003623164bb01e2e3065e4881c..2234f40d112627a8af00d575a77587c26487341b 100644 (file)
@@ -10,7 +10,7 @@ Tests for function that handle country properties.
 from textwrap import dedent
 import pytest
 
-from nominatim.tools import country_info
+from nominatim.data import country_info
 
 @pytest.fixture
 def loaded_country(def_config):
index 3991dff366d8f6f2eac46e8d80adea60130e62dc..e0be962f09e8fe991840d6533b87c3ad05ff9585 100644 (file)
@@ -65,7 +65,7 @@ class TestCopyBuffer:
 
     @pytest.fixture(autouse=True)
     def setup_test_table(self, table_factory):
-        table_factory(self.TABLE_NAME, 'colA INT, colB TEXT')
+        table_factory(self.TABLE_NAME, 'col_a INT, col_b TEXT')
 
 
     def table_rows(self, cursor):
@@ -92,7 +92,7 @@ class TestCopyBuffer:
             buf.add('foo')
 
             buf.copy_out(temp_db_cursor, self.TABLE_NAME,
-                         columns=['colB'])
+                         columns=['col_b'])
 
         assert self.table_rows(temp_db_cursor) == {(None, 'foo')}
 
@@ -103,7 +103,7 @@ class TestCopyBuffer:
             buf.add(' two ', 2)
 
             buf.copy_out(temp_db_cursor, self.TABLE_NAME,
-                         columns=['colB', 'colA'])
+                         columns=['col_b', 'col_a'])
 
         assert self.table_rows(temp_db_cursor) == {(1, 'one'), (2, ' two ')}
 
@@ -115,7 +115,7 @@ class TestCopyBuffer:
             buf.add('\\N')
 
             buf.copy_out(temp_db_cursor, self.TABLE_NAME,
-                         columns=['colB'])
+                         columns=['col_b'])
 
         assert self.table_rows(temp_db_cursor) == {(None, 'foo\tbar'),
                                                    (None, 'sun\nson'),
@@ -128,7 +128,7 @@ class TestCopyBufferJson:
 
     @pytest.fixture(autouse=True)
     def setup_test_table(self, table_factory):
-        table_factory(self.TABLE_NAME, 'colA INT, colB JSONB')
+        table_factory(self.TABLE_NAME, 'col_a INT, col_b JSONB')
 
 
     def table_rows(self, cursor):
index 57bf2f2f44de8750189dd802a76c8cbfdb9b5f19..ed32c650dab3d873ae8dbddbf5fead64ea1e94ac 100644 (file)
@@ -7,7 +7,7 @@
 """
 Tokenizer for testing.
 """
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 from nominatim.config import Configuration
 
 def create(dsn, data_dir):
index 34cc7413b49c32300d34e112028a52e873b5810b..128e1201ed1c4b3cdfa9714375e632ba75e0c374 100644 (file)
@@ -10,7 +10,7 @@ Tests for the sanitizer that normalizes housenumbers.
 import pytest
 
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 @pytest.fixture
 def sanitize(request):
index 443761962566cc694b4bf7c2765598f72ead5e09..237527f1e44c7064cf8be6e2c3025353a1774ebd 100644 (file)
@@ -10,8 +10,8 @@ Tests for the sanitizer that normalizes postcodes.
 import pytest
 
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
-from nominatim.indexer.place_info import PlaceInfo
-from nominatim.tools import country_info
+from nominatim.data.place_info import PlaceInfo
+from nominatim.data import country_info
 
 @pytest.fixture
 def sanitize(def_config, request):
index 47bd1e44cb0806ac6c331bced65e7219d90660d5..67157fba2148a9e806dce756c68e797e82719cdb 100644 (file)
@@ -10,7 +10,7 @@ Tests for the sanitizer that splits multivalue lists.
 import pytest
 
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 from nominatim.errors import UsageError
 
index c37562dd3a2517b6d8f0367ea08845ca13ef4f44..eb5543646595a96aa7183e7c813f54ef61813b6e 100644 (file)
@@ -10,7 +10,7 @@ Tests for the sanitizer that handles braced suffixes.
 import pytest
 
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 def run_sanitizer_on(**kwargs):
     place = PlaceInfo({'name': kwargs})
index dfd2511343bf5a1936b867c7bf201e24f5bd196f..306b80273f7876b1babc0f4efef022fa8e258aaa 100644 (file)
@@ -9,9 +9,9 @@ Tests for the sanitizer that enables language-dependent analyzers.
 """
 import pytest
 
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
-from nominatim.tools.country_info import setup_country_config
+from nominatim.data.country_info import setup_country_config
 
 class TestWithDefaults:
 
index b9de97bcc2b11f7ff94ef4d6dd00ce6b6c54b7a7..7f0ffce16b5a3aa8c90e228aa9abd9f57cd6097a 100644 (file)
@@ -17,7 +17,7 @@ from nominatim.tokenizer import icu_tokenizer
 import nominatim.tokenizer.icu_rule_loader
 from nominatim.db import properties
 from nominatim.db.sql_preprocessor import SQLPreprocessor
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 from mock_icu_word_table import MockIcuWordTable
 
index 8f79e2422b74c46f7634462e797092dd40a1bfac..57a82b8a0f4aa5af2f1dc6c2377db0fca8d83959 100644 (file)
@@ -12,7 +12,7 @@ import re
 
 import pytest
 
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 from nominatim.tokenizer import legacy_tokenizer
 from nominatim.db import properties
 from nominatim.errors import UsageError
index 2dd4e58a7ef731090cbd96a5f14bc3613581b91f..31401bd19aa0eb73c161823023e537aef31d1726 100644 (file)
@@ -11,7 +11,7 @@ import pytest
 
 from nominatim.errors import UsageError
 import nominatim.tokenizer.place_sanitizer as sanitizer
-from nominatim.indexer.place_info import PlaceInfo
+from nominatim.data.place_info import PlaceInfo
 
 
 def test_placeinfo_clone_new_name():
index 78650180f96ff0c2a668e4dd9cd641581f3526e1..26ea92b2c787fb72b6c4a39e64e9646723907cd9 100644 (file)
@@ -122,7 +122,13 @@ class TestRunApiScript:
 
     @staticmethod
     def test_fail_on_error_output(tmp_path):
-        (tmp_path / 'website' / 'bad.php').write_text("<?php\nfwrite(STDERR, 'WARNING'.PHP_EOL);")
+        # Starting PHP 8 the PHP CLI no longer has STDERR defined as constant
+        php = """
+        <?php
+        if(!defined('STDERR')) define('STDERR', fopen('php://stderr', 'wb'));
+        fwrite(STDERR, 'WARNING'.PHP_EOL);
+        """
+        (tmp_path / 'website' / 'bad.php').write_text(php)
 
         assert exec_utils.run_api_script('bad', tmp_path) == 1
 
index 0c4b93fcac2e8e85cc7065d07505cf87f5616402..f5e8f3de0c83c329fce8e2d83a4ecbea0a84ed33 100644 (file)
@@ -11,7 +11,8 @@ import subprocess
 
 import pytest
 
-from nominatim.tools import postcodes, country_info
+from nominatim.tools import postcodes
+from nominatim.data import country_info
 import dummy_tokenizer
 
 class MockPostcodeTable:
diff --git a/vagrant/Install-on-Ubuntu-22.sh b/vagrant/Install-on-Ubuntu-22.sh
new file mode 100755 (executable)
index 0000000..dbb70ff
--- /dev/null
@@ -0,0 +1,271 @@
+#!/bin/bash -e
+#
+# hacks for broken vagrant box      #DOCS:
+sudo rm -f /var/lib/dpkg/lock       #DOCS:
+export APT_LISTCHANGES_FRONTEND=none #DOCS:
+export DEBIAN_FRONTEND=noninteractive #DOCS:
+
+# *Note:* these installation instructions are also available in executable
+#         form for use with vagrant under vagrant/Install-on-Ubuntu-22.sh.
+#
+# Installing the Required Software
+# ================================
+#
+# These instructions expect that you have a freshly installed Ubuntu 22.04.
+#
+# Make sure all packages are up-to-date by running:
+#
+
+    sudo apt update -qq
+
+# Now you can install all packages needed for Nominatim:
+
+    sudo apt install -y php-cgi
+    sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
+                        libboost-filesystem-dev libexpat1-dev zlib1g-dev \
+                        libbz2-dev libpq-dev libproj-dev \
+                        postgresql-server-dev-14 postgresql-14-postgis-3 \
+                        postgresql-contrib-14 postgresql-14-postgis-3-scripts \
+                        php php-pgsql php-intl libicu-dev python3-dotenv \
+                        python3-psycopg2 python3-psutil python3-jinja2 \
+                        python3-icu python3-datrie git
+
+#
+# System Configuration
+# ====================
+#
+# The following steps are meant to configure a fresh Ubuntu installation
+# for use with Nominatim. You may skip some of the steps if you have your
+# OS already configured.
+#
+# Creating Dedicated User Accounts
+# --------------------------------
+#
+# Nominatim will run as a global service on your machine. It is therefore
+# best to install it under its own separate user account. In the following
+# we assume this user is called nominatim and the installation will be in
+# /srv/nominatim. To create the user and directory run:
+#
+#     sudo useradd -d /srv/nominatim -s /bin/bash -m nominatim
+#
+# You may find a more suitable location if you wish.
+#
+# To be able to copy and paste instructions from this manual, export
+# user name and home directory now like this:
+#
+if [ "x$USERNAME" == "x" ]; then #DOCS:
+    export USERNAME=vagrant        #DOCS:    export USERNAME=nominatim
+    export USERHOME=/home/vagrant  #DOCS:    export USERHOME=/srv/nominatim
+fi                                 #DOCS:
+#
+# **Never, ever run the installation as a root user.** You have been warned.
+#
+# Make sure that system servers can read from the home directory:
+
+    chmod a+x $USERHOME
+
+# Setting up PostgreSQL
+# ---------------------
+#
+# Tune the postgresql configuration, which is located in 
+# `/etc/postgresql/14/main/postgresql.conf`. See section *Postgres Tuning* in
+# [the installation page](../admin/Installation.md#postgresql-tuning)
+# for the parameters to change.
+#
+# Restart the postgresql service after updating this config file.
+
+if [ "x$NOSYSTEMD" == "xyes" ]; then  #DOCS:
+    sudo pg_ctlcluster 14 main start  #DOCS:
+else                                  #DOCS:
+    sudo systemctl restart postgresql
+fi                                    #DOCS:
+#
+# Finally, we need to add two postgres users: one for the user that does
+# the import and another for the webserver which should access the database
+# for reading only:
+#
+
+    sudo -u postgres createuser -s $USERNAME
+    sudo -u postgres createuser www-data
+
+#
+# Installing Nominatim
+# ====================
+#
+# Building and Configuration
+# --------------------------
+#
+# Get the source code from Github and change into the source directory
+#
+if [ "x$1" == "xyes" ]; then  #DOCS:    :::sh
+    cd $USERHOME
+    git clone --recursive https://github.com/openstreetmap/Nominatim.git
+    cd Nominatim
+else                               #DOCS:
+    cd $USERHOME/Nominatim         #DOCS:
+fi                                 #DOCS:
+
+# When installing the latest source from github, you also need to
+# download the country grid:
+
+if [ ! -f data/country_osm_grid.sql.gz ]; then       #DOCS:    :::sh
+    wget -O data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
+fi                                 #DOCS:
+
+# The code must be built in a separate directory. Create this directory,
+# then configure and build Nominatim in there:
+
+    mkdir $USERHOME/build
+    cd $USERHOME/build
+    cmake $USERHOME/Nominatim
+    make
+    sudo make install
+
+# Nominatim is now ready to use. You can continue with
+# [importing a database from OSM data](../admin/Import.md). If you want to set up
+# a webserver first, continue reading.
+#
+# Setting up a webserver
+# ======================
+#
+# The webserver should serve the php scripts from the website directory of your
+# [project directory](../admin/Import.md#creating-the-project-directory).
+# This directory needs to exist when being configured.
+# Therefore set up a project directory and create a website directory:
+
+    mkdir $USERHOME/nominatim-project
+    mkdir $USERHOME/nominatim-project/website
+
+# The import process will populate the directory later.
+
+#
+# Option 1: Using Apache
+# ----------------------
+#
+if [ "x$2" == "xinstall-apache" ]; then #DOCS:
+#
+# Apache has a PHP module that can be used to serve Nominatim. To install them
+# run:
+
+    sudo apt install -y apache2 libapache2-mod-php
+
+# You need to create an alias to the website directory in your apache
+# configuration. Add a separate nominatim configuration to your webserver:
+
+#DOCS:```sh
+sudo tee /etc/apache2/conf-available/nominatim.conf << EOFAPACHECONF
+<Directory "$USERHOME/nominatim-project/website">
+  Options FollowSymLinks MultiViews
+  AddType text/html   .php
+  DirectoryIndex search.php
+  Require all granted
+</Directory>
+
+Alias /nominatim $USERHOME/nominatim-project/website
+EOFAPACHECONF
+#DOCS:```
+
+#
+# Then enable the configuration and restart apache
+#
+
+    sudo a2enconf nominatim
+if [ "x$NOSYSTEMD" == "xyes" ]; then  #DOCS:
+    sudo apache2ctl start             #DOCS:
+else                                  #DOCS:
+    sudo systemctl restart apache2
+fi                                    #DOCS:
+
+# The Nominatim API is now available at `http://localhost/nominatim/`.
+
+fi   #DOCS:
+
+#
+# Option 2: Using nginx
+# ---------------------
+#
+if [ "x$2" == "xinstall-nginx" ]; then #DOCS:
+
+# Nginx has no native support for php scripts. You need to set up php-fpm for
+# this purpose. First install nginx and php-fpm:
+
+    sudo apt install -y nginx php-fpm
+
+# You need to configure php-fpm to listen on a Unix socket.
+
+#DOCS:```sh
+sudo tee /etc/php/8.1/fpm/pool.d/www.conf << EOF_PHP_FPM_CONF
+[www]
+; Replace the tcp listener and add the unix socket
+listen = /var/run/php8.1-fpm.sock
+
+; Ensure that the daemon runs as the correct user
+listen.owner = www-data
+listen.group = www-data
+listen.mode = 0666
+
+; Unix user of FPM processes
+user = www-data
+group = www-data
+
+; Choose process manager type (static, dynamic, ondemand)
+pm = ondemand
+pm.max_children = 5
+EOF_PHP_FPM_CONF
+#DOCS:```
+
+# Then create a Nginx configuration to forward http requests to that socket.
+
+#DOCS:```sh
+sudo tee /etc/nginx/sites-available/default << EOF_NGINX_CONF
+server {
+    listen 80 default_server;
+    listen [::]:80 default_server;
+
+    root $USERHOME/nominatim-project/website;
+    index search.php index.html;
+    location / {
+        try_files \$uri \$uri/ @php;
+    }
+
+    location @php {
+        fastcgi_param SCRIPT_FILENAME "\$document_root\$uri.php";
+        fastcgi_param PATH_TRANSLATED "\$document_root\$uri.php";
+        fastcgi_param QUERY_STRING    \$args;
+        fastcgi_pass unix:/var/run/php8.1-fpm.sock;
+        fastcgi_index index.php;
+        include fastcgi_params;
+    }
+
+    location ~ [^/]\.php(/|$) {
+        fastcgi_split_path_info ^(.+?\.php)(/.*)$;
+        if (!-f \$document_root\$fastcgi_script_name) {
+            return 404;
+        }
+        fastcgi_pass unix:/var/run/php7.4-fpm.sock;
+        fastcgi_index search.php;
+        include fastcgi.conf;
+    }
+}
+EOF_NGINX_CONF
+#DOCS:```
+
+# If you have some errors, make sure that php8.1-fpm.sock is well under
+# /var/run/ and not under /var/run/php. Otherwise change the Nginx configuration
+# to /var/run/php/php8.1-fpm.sock.
+#
+# Enable the configuration and restart Nginx
+#
+
+if [ "x$NOSYSTEMD" == "xyes" ]; then  #DOCS:
+    sudo /usr/sbin/php-fpm8.1 --nodaemonize --fpm-config /etc/php/8.1/fpm/php-fpm.conf & #DOCS:
+    sudo /usr/sbin/nginx &            #DOCS:
+else                                  #DOCS:
+    sudo systemctl restart php8.1-fpm nginx
+fi                                    #DOCS:
+
+# The Nominatim API is now available at `http://localhost/`.
+
+
+
+fi   #DOCS: