steps:
- name: Remove existing PostgreSQL
run: |
- sudo apt-get update -qq
sudo apt-get purge -yq postgresql*
+ sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
+ sudo apt-get update -qq
+
shell: bash
- name: Install PostgreSQL
uses: shivammathur/setup-php@v2
with:
php-version: '7.4'
- tools: phpunit, phpcs
+ tools: phpunit, phpcs, composer
- name: Get Date
id: get-date
- uses: ./Nominatim/.github/actions/build-nominatim
- name: Install test prerequsites
- run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave
+ run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage php-xdebug
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: PHP unit tests
- run: phpunit ./
+ run: phpunit --coverage-clover ../../coverage-php.xml ./
working-directory: Nominatim/test/php
- name: Python unit tests
- run: py.test-3 test/python
+ run: py.test-3 --cov=nominatim --cov-report=xml test/python
working-directory: Nominatim
- name: BDD tests
- run: behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
+ run: |
+ behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov
+ composer require phpunit/phpcov:7.0.2
+ vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov
working-directory: Nominatim/test/bdd
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v1
+ with:
+ files: ./Nominatim/coverage*.xml
+ directory: ./
+ name: codecov-umbrella
+ fail_ci_if_error: true
+ path_to_write_report: ./coverage/codecov_report.txt
+ verbose: true
+
import:
runs-on: ubuntu-20.04
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 3)
-set(NOMINATIM_VERSION_MINOR 6)
+set(NOMINATIM_VERSION_MINOR 7)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
+3.7.0
+
+ * switch to dotenv for configuration file
+ * introduce 'make install' (reorganising most of the code)
+ * introduce nominatim tool as replacement for various php scripts
+ * introduce project directories and allow multiple installations from same build
+ * clean up BDD tests: drop nose, reorganise step code
+ * simplify test database for API BDD tests and autoinstall database
+ * port most of the code for command-line tools to Python
+ (thanks to @darkshredder and @AntoJvlt)
+ * add tests for all tooling
+ * replace pyosmium-get-changes with custom internal implementation using
+ pyosmium
+ * improve search for queries with housenumber and partial terms
+ * add database versioning
+ * use jinja2 for preprocessing SQL files
+ * introduce automatic migrations
+ * reverse fix preference of interpolations over housenumbers
+ * parallelize indexing of postcodes
+ * add non-key indexes to speed up housenumber + street searches
+ * switch housenumber field in placex to save transliterated names
+
+
3.6.0
* add full support for searching by and displaying of addr:* tags
[![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
+[![codecov](https://codecov.io/gh/osm-search/Nominatim/branch/master/graph/badge.svg?token=8P1LXrhCMy)](https://codecov.io/gh/osm-search/Nominatim)
Nominatim
=========
--- /dev/null
+codecov:
+ require_ci_to_pass: yes
+
+coverage:
+ status:
+ project: off
+ patch: off
+
+comment:
+ require_changes: true
+ after_n_builds: 2
+
+fixes:
+ - "Nominatim/::"
\ No newline at end of file
### Configuration setup in `.env`
-The Nominatim server can be customized via a `.env` in the project directory.
-This is a file in [dotenv](https://github.com/theskumar/python-dotenv) format
-which looks the same as variable settings in a standard shell environment.
+The Nominatim server can be customized via an `.env` configuration file in the
+project directory. This is a file in [dotenv](https://github.com/theskumar/python-dotenv)
+format which looks the same as variable settings in a standard shell environment.
You can also set the same configuration via environment variables. All
settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
variables.
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
- 1. Get preprocessed TIGER 2020 data and unpack it into your project
- directory:
+ 1. Get preprocessed TIGER 2020 data:
cd $PROJECT_DIR
wget https://nominatim.org/data/tiger2020-nominatim-preprocessed.tar.gz
- tar xf tiger2020-nominatim-preprocessed.tar.gz
2. Import the data into your Nominatim database:
- nominatim add-data --tiger-data tiger
+ nominatim add-data --tiger-data tiger2020-nominatim-preprocessed.tar.gz
3. Enable use of the Tiger data in your `.env` by adding:
the following steps:
* stop any updates that are potentially running
-* update Nominatim to the nwer version
-* goto your project directory and run `nominatim admin --migrate`
+* update Nominatim to the newer version
+* go to your project directory and run `nominatim admin --migrate`
* (optionally) restart updates
Below you find additional migrations and hints about other structural and
-breaking changes.
+breaking changes. **Please read them before running the migration.**
!!! note
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
-## 3.6.0 -> master
+## 3.6.0 -> 3.7.0
+
+### New format and name of configuration file
+
+The configuration for an import is now saved in a `.env` file in the project
+directory. This file follows the dotenv format. For more information, see
+the [installation chapter](Import.md#configuration-setup-in-env).
+
+To migrate to the new system, create a new project directory, add the `.env`
+file and port your custom configuration from `settings/local.php`. Most
+settings are named similar and only have received a `NOMINATIM_` prefix.
+Use the default settings in `settings/env.defaults` as a reference.
### New location for data files
`./utils/query.php` no longer exists in its old form. `nominatim search`
provides a replacement but returns different output.
+### Switch to normalized house numbers
+
+The housenumber column in the placex table uses now normalized version.
+The automatic migration step will convert the column but this may take a
+very long time. It is advisable to take the machine offline while doing that.
+
## 3.5.0 -> 3.6.0
### Change of layout of search_name_* tables
protected $aLangPrefOrder = array();
protected $aExcludePlaceIDs = array();
- protected $bReverseInPlan = true;
protected $iLimit = 20;
protected $iFinalLimit = 10;
return $this->oNormalizer->transliterate($sTerm);
}
- public function setReverseInPlan($bReverse)
- {
- $this->bReverseInPlan = $bReverse;
- }
-
public function setLanguagePreference($aLangPref)
{
$this->aLangPrefOrder = $aLangPref;
$oParams->getString('country'),
$oParams->getString('postalcode')
);
- $this->setReverseInPlan(false);
} else {
$this->setQuery($sQuery);
}
return false;
}
- public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
+ public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens)
{
/*
Calculate all searches using oValidTokens i.e.
*/
foreach ($aPhrases as $iPhrase => $oPhrase) {
$aNewPhraseSearches = array();
- $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
+ $sPhraseType = $oPhrase->getPhraseType();
foreach ($oPhrase->getWordSets() as $aWordset) {
$aWordsetSearches = $aSearches;
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
$sToken,
$oSearchTerm,
- $bIsStructured,
+ (bool) $sPhraseType,
$iPhrase,
$oValidTokens->get(' '.$sToken)
);
// Commas are used to reduce the search space by indicating where phrases split
if ($this->aStructuredQuery) {
$aInPhrases = $this->aStructuredQuery;
- $bStructuredPhrases = true;
} else {
$aInPhrases = explode(',', $sQuery);
- $bStructuredPhrases = false;
}
Debug::printDebugArray('Search context', $oCtx);
Debug::newSection('Search candidates');
- $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
+ $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
- if ($this->bReverseInPlan) {
+ if (!$this->aStructuredQuery) {
// Reverse phrase array and also reverse the order of the wordsets in
// the first and final phrase. Don't bother about phrases in the middle
// because order in the address doesn't matter.
if (count($aPhrases) > 1) {
$aPhrases[count($aPhrases)-1]->invertWordSets();
}
- $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
+ $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
'Structured query' => $this->aStructuredQuery,
'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder),
'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs),
- 'Try reversed query'=> $this->bReverseInPlan,
'Limit (for searches)' => $this->iLimit,
'Limit (for results)'=> $this->iFinalLimit,
'Country codes' => Debug::fmtArrayVals($this->aCountryCodes),
$oGeocode = new Nominatim\Geocode($oDB);
$oGeocode->setLanguagePreference($oParams->getPreferredLanguages(false));
-$oGeocode->setReverseInPlan(true);
$oGeocode->loadParamArray($oParams);
if ($oParams->getBool('search')) {
END IF;
IF bnd.name ? 'name' THEN
- bnd_name := make_standard_name(bnd.name->'name');
+ bnd_name := lower(bnd.name->'name');
IF bnd_name = '' THEN
bnd_name := NULL;
END IF;
IF bnd.extratags ? 'place' and bnd_name is not null THEN
FOR linked_placex IN
SELECT * FROM placex
- WHERE make_standard_name(name->'name') = bnd_name
+ WHERE (position(lower(name->'name') in bnd_name) > 0
+ OR position(bnd_name in lower(name->'name')) > 0)
AND placex.class = 'place' AND placex.type = bnd.extratags->'place'
AND placex.osm_type = 'N'
AND placex.linked_place_id is null
AND placex.rank_search < 26 -- needed to select the right index
- AND _st_covers(bnd.geometry, placex.geometry)
+ AND placex.type != 'postcode'
+ AND ST_Covers(bnd.geometry, placex.geometry)
LOOP
{% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
AND placex.linked_place_id is null
AND placex.rank_search < 26
AND _st_covers(bnd.geometry, placex.geometry)
- ORDER BY make_standard_name(name->'name') = bnd_name desc
+ ORDER BY lower(name->'name') = bnd_name desc
LOOP
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
{% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %}
FOR linked_placex IN
SELECT placex.* from placex
- WHERE make_standard_name(name->'name') = bnd_name
+ WHERE lower(name->'name') = bnd_name
AND ((bnd.rank_address > 0
and bnd.rank_address = (compute_place_rank(placex.country_code,
'N', placex.class,
false, placex.postcode)).address_rank)
OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search))
AND placex.osm_type = 'N'
+ AND placex.class = 'place'
AND placex.linked_place_id is null
AND placex.rank_search < 26 -- needed to select the right index
- AND _st_covers(bnd.geometry, placex.geometry)
+ AND placex.type != 'postcode'
+ AND ST_Covers(bnd.geometry, placex.geometry)
LOOP
{% if debug %}RAISE WARNING 'Found matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode
- ON placex USING gist (geometry) {{db.tablespace.search_index}}
- WHERE osm_type = 'N' AND rank_search between 5 and 25
- AND class = 'place' AND type != 'postcode'
- AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-
CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {{db.tablespace.address_index}} WHERE osm_type='N' and rank_search < 26;
+CREATE INDEX idx_placex_geometry_placenode ON placex
+ USING GIST (geometry) {{db.tablespace.search_index}}
+ WHERE osm_type = 'N' and rank_search < 26
+ and class = 'place' and type != 'postcode' and linked_place_id is null;
CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
DROP SEQUENCE IF EXISTS seq_place;
'idx_placex_rank_address',
'idx_placex_parent_place_id',
'idx_placex_geometry_reverse_lookuppolygon',
- 'idx_placex_geometry_reverse_placenode',
+ 'idx_placex_geometry_placenode',
+ 'idx_placex_housenumber',
'idx_osmline_parent_place_id',
'idx_osmline_parent_osm_id',
'idx_postcode_id',
cur.execute("""UPDATE placex
SET housenumber = create_housenumber_id(housenumber)
WHERE housenumber is not null""")
+
+
+@_migration(3, 7, 0, 0)
+def switch_placenode_geometry_index(conn, **_):
+ """ Replace idx_placex_geometry_reverse_placeNode index.
+
+ Make the index slightly more permissive, so that it can also be used
+ when matching up boundaries and place nodes. It makes the index
+ idx_placex_adminname index unnecessary.
+ """
+ with conn.cursor() as cur:
+ cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
+ USING GIST (geometry)
+ WHERE osm_type = 'N' and rank_search < 26
+ and class = 'place' and type != 'postcode'
+ and linked_place_id is null""")
+ cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
self.sanity_check_pattern = re.compile(r'^\w+$')
self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
self.config.TERM_NORMALIZATION)
+ #This set will contain all existing phrases from the word table which
+ #no longer exist on the wiki.
+ #It contain tuples with the following format: (normalized_word, class, type, operator)
+ self.words_phrases_to_delete = set()
+ #This set will contain the phrases which still exist from the wiki.
+ #It is used to prevent duplicates on the wiki by removing them from
+ #the word_phrases_to_delete only at the end.
+ self.words_phrases_still_exist = set()
+ #This set will contain all existing place_classtype tables which doesn't match any
+ #special phrases class/type on the wiki.
+ self.table_phrases_to_delete = set()
def import_from_wiki(self, languages=None):
"""
if languages is not None and not isinstance(languages, list):
raise TypeError('The \'languages\' argument should be of type list.')
+ self._fetch_existing_words_phrases()
+ self._fetch_existing_place_classtype_tables()
+
#Get all languages to process.
languages = self._load_languages() if not languages else languages
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
self._create_place_classtype_table_and_indexes(class_type_pairs)
+ self._remove_non_existent_phrases_from_db()
self.db_connection.commit()
LOG.warning('Import done.')
+ def _fetch_existing_words_phrases(self):
+ """
+ Fetch existing special phrases from the word table.
+ Fill the word_phrases_to_delete set of the class.
+ """
+ #Only extract special phrases terms:
+ #If class=place and type=house then it is a housenumber term.
+ #If class=place and type=postcode then it is a postcode term.
+ word_query = """
+ SELECT word, class, type, operator FROM word
+ WHERE class != 'place' OR (type != 'house' AND type != 'postcode')
+ """
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(word_query))
+ for row in db_cursor:
+ row[3] = '-' if row[3] is None else row[3]
+ self.words_phrases_to_delete.add(
+ (row[0], row[1], row[2], row[3])
+ )
+
+ def _fetch_existing_place_classtype_tables(self):
+ """
+ Fetch existing place_classtype tables.
+ Fill the table_phrases_to_delete set of the class.
+ """
+ query = """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public'
+ AND table_name like 'place_classtype_%';
+ """
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(query))
+ for row in db_cursor:
+ self.table_phrases_to_delete.add(row[0])
+
def _load_white_and_black_lists(self):
"""
Load white and black lists from phrases-settings.json.
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- return self.config.LANGUAGES or default_languages
+ return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
@staticmethod
def _get_wiki_content(lang):
phrase_class = match[1].strip()
phrase_type = match[2].strip()
phrase_operator = match[3].strip()
+ #Needed if some operator in the wiki are not written in english
+ phrase_operator = '-' if phrase_operator not in ('near', 'in') else phrase_operator
#hack around a bug where building=yes was imported with quotes into the wiki
phrase_type = re.sub(r'\"|"', '', phrase_type)
- #sanity check, in case somebody added garbage in the wiki
- self._check_sanity(lang, phrase_class, phrase_type)
-
#blacklisting: disallow certain class/type combinations
if (
phrase_class in self.black_list.keys() and
):
continue
- #add class/type to the pairs dict
+ #Check if the phrase already exists in the database.
+ if (
+ (normalized_label, phrase_class, phrase_type, phrase_operator)
+ in self.words_phrases_to_delete
+ ):
+ #Remove this phrase from the ones to delete as it still exist on the wiki.
+ self.words_phrases_still_exist.add(
+ (normalized_label, phrase_class, phrase_type, phrase_operator)
+ )
+ class_type_pairs.add((phrase_class, phrase_type))
+ #Dont need to add this phrase as it already exists in the word table.
+ continue
+
+ #sanity check, in case somebody added garbage in the wiki
+ self._check_sanity(lang, phrase_class, phrase_type)
+
class_type_pairs.add((phrase_class, phrase_type))
self._process_amenity(
phrase_class = pair[0]
phrase_type = pair[1]
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+
+ if table_name in self.table_phrases_to_delete:
+ #Remove this table from the ones to delete as it match a class/type
+ #still existing on the special phrases of the wiki.
+ self.table_phrases_to_delete.remove(table_name)
+ #So dont need to create the table and indexes.
+ continue
+
#Table creation
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
.format(Identifier(table_name),
Identifier(self.config.DATABASE_WEBUSER)))
+ def _remove_non_existent_phrases_from_db(self):
+ """
+ Remove special phrases which doesn't exist on the wiki anymore.
+ Delete from the word table and delete the place_classtype tables.
+ """
+ LOG.warning('Cleaning database...')
+ self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist
+ #Array containing all queries to execute. Contain tuples of format (query, parameters)
+ queries_parameters = []
+
+ #Delete phrases from the word table which are not on the wiki anymore.
+ for phrase_to_delete in self.words_phrases_to_delete:
+ if phrase_to_delete[3] == '-':
+ query = """
+ DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
+ """
+ parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], )
+ queries_parameters.append((query, parameters))
+ else:
+ query = """
+ DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s
+ """
+ parameters = (phrase_to_delete[0], phrase_to_delete[1],
+ phrase_to_delete[2], phrase_to_delete[3], )
+ queries_parameters.append((query, parameters))
+
+ #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
+ for table in self.table_phrases_to_delete:
+ query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
+ queries_parameters.append((query, ()))
+
+ with self.db_connection.cursor() as db_cursor:
+ for query, parameters in queries_parameters:
+ db_cursor.execute(query, parameters)
+
def _convert_php_settings_if_needed(self, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
except Exception as exc: # pylint: disable=broad-except
LOG.info('Wrong SQL statement: %s', exc)
+def handle_unregister_connection_pool(sel, place_threads):
+ """ Handles unregistering pool of connections
+ """
+
+ while place_threads > 0:
+ for key, _ in sel.select(1):
+ conn = key.data
+ sel.unregister(conn)
+ try:
+ conn.wait()
+ except Exception as exc: # pylint: disable=broad-except
+ LOG.info('Wrong SQL statement: %s', exc)
+ conn.close()
+ place_threads -= 1
def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir):
""" Import tiger data from directory or tar file
handle_threaded_sql_statements(sel, file)
# Unregistering pool of database connections
- while place_threads > 0:
- for key, _ in sel.select(1):
- conn = key.data
- sel.unregister(conn)
- conn.wait()
- conn.close()
- place_threads -= 1
+ handle_unregister_connection_pool(sel, place_threads)
if tar:
tar.close()
# and must always be increased when there is a change to the database or code
# that requires a migration.
# Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (3, 6, 0, 1)
+NOMINATIM_VERSION = (3, 7, 0, 1)
POSTGRESQL_REQUIRED_VERSION = (9, 3)
POSTGIS_REQUIRED_VERSION = (2, 2)
Tests for import special phrases methods
of the class SpecialPhrasesImporter.
"""
+from mocks import MockParamCapture
from nominatim.errors import UsageError
from pathlib import Path
import tempfile
TEST_BASE_DIR = Path(__file__) / '..' / '..'
+def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table,
+ temp_db_conn):
+ """
+ Check for the fetch_existing_words_phrases() method.
+ It should return special phrase term added to the word
+ table.
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ query ="""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'class', 'type', null, 0, 'near');
+ """
+ temp_db_cursor.execute(query)
+
+ assert not special_phrases_importer.words_phrases_to_delete
+ special_phrases_importer._fetch_existing_words_phrases()
+ contained_phrase = special_phrases_importer.words_phrases_to_delete.pop()
+ assert contained_phrase == ('normalized_word', 'class', 'type', 'near')
+
+def test_fetch_existing_words_phrases_housenumber(special_phrases_importer, word_table,
+ temp_db_conn):
+ """
+ Check for the fetch_existing_words_phrases() method.
+ It should return nothing as the term added correspond
+ to a housenumber term.
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ query ="""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'place', 'house', null, 0, 'near');
+ """
+ temp_db_cursor.execute(query)
+
+ special_phrases_importer._fetch_existing_words_phrases()
+ assert not special_phrases_importer.words_phrases_to_delete
+
+def test_fetch_existing_words_phrases_postcode(special_phrases_importer, word_table,
+ temp_db_conn):
+ """
+ Check for the fetch_existing_words_phrases() method.
+ It should return nothing as the term added correspond
+ to a postcode term.
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ query ="""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'place', 'postcode', null, 0, 'near');
+ """
+ temp_db_cursor.execute(query)
+
+ special_phrases_importer._fetch_existing_words_phrases()
+ assert not special_phrases_importer.words_phrases_to_delete
+
+def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_conn):
+ """
+ Check for the fetch_existing_place_classtype_tables() method.
+ It should return the table just created.
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ query = 'CREATE TABLE place_classtype_testclasstypetable()'
+ temp_db_cursor.execute(query)
+
+ special_phrases_importer._fetch_existing_place_classtype_tables()
+ contained_table = special_phrases_importer.table_phrases_to_delete.pop()
+ assert contained_table == 'place_classtype_testclasstypetable'
+
def test_check_sanity_class(special_phrases_importer):
"""
Check for _check_sanity() method.
assert returned == json_file
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
- word_table, temp_db_conn):
+ temp_db_conn):
"""
Test that _process_amenity() execute well the
getorcreate_amenityoperator() SQL function and that
assert check_amenities_without_op(temp_db_conn)
assert results[class_test] and type_test in results.values()
+def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases,
+ temp_db_conn):
+ """
+ Check for the remove_non_existent_phrases_from_db() method.
+
+ It should removed entries from the word table which are contained
+ in the words_phrases_to_delete set and not those also contained
+ in the words_phrases_still_exist set.
+
+ place_classtype tables contained in table_phrases_to_delete should
+ be deleted.
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near')
+ to_keep_phrase_tuple = (
+ 'normalized_word_exists', 'class_exists', 'type_exists', 'near'
+ )
+ special_phrases_importer.words_phrases_to_delete = {
+ to_delete_phrase_tuple,
+ to_keep_phrase_tuple
+ }
+ special_phrases_importer.words_phrases_still_exist = {
+ to_keep_phrase_tuple
+ }
+ special_phrases_importer.table_phrases_to_delete = {
+ 'place_classtype_testclasstypetable_to_delete'
+ }
+
+ query_words = 'SELECT word, class, type, operator FROM word;'
+ query_tables = """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public'
+ AND table_name like 'place_classtype_%';
+ """
+
+ special_phrases_importer._remove_non_existent_phrases_from_db()
+
+ temp_db_cursor.execute(query_words)
+ words_result = temp_db_cursor.fetchall()
+ temp_db_cursor.execute(query_tables)
+ tables_result = temp_db_cursor.fetchall()
+ assert len(words_result) == 1 and words_result[0] == [
+ 'normalized_word_exists', 'class_exists', 'type_exists', 'near'
+ ]
+ assert (len(tables_result) == 1 and
+ tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
+ )
+
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
- getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+ getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table):
"""
Check that the main import_from_wiki() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
grand access to the web user and executing the SQL functions for amenities.
"""
+ mock_fetch_existing_words_phrases = MockParamCapture()
+ mock_fetch_existing_place_classtype_tables = MockParamCapture()
+ mock_remove_non_existent_phrases_from_db = MockParamCapture()
+
+ monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._fetch_existing_words_phrases',
+ mock_fetch_existing_words_phrases)
+ monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._fetch_existing_place_classtype_tables',
+ mock_fetch_existing_place_classtype_tables)
+ monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._remove_non_existent_phrases_from_db',
+ mock_remove_non_existent_phrases_from_db)
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
special_phrases_importer.import_from_wiki(['en'])
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
assert check_amenities_with_op(temp_db_conn)
assert check_amenities_without_op(temp_db_conn)
+ assert mock_fetch_existing_words_phrases.called == 1
+ assert mock_fetch_existing_place_classtype_tables.called == 1
+ assert mock_remove_non_existent_phrases_from_db.called == 1
def mock_get_wiki_content(lang):
"""
yield Path(phpdir)
+@pytest.fixture
+def default_phrases(word_table, temp_db_cursor):
+ temp_db_cursor.execute("""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'class', 'type', null, 0, 'near');
+
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists',
+ 'class_exists', 'type_exists', null, 0, 'near');
+
+ CREATE TABLE place_classtype_testclasstypetable_to_delete();
+ CREATE TABLE place_classtype_testclasstypetable_to_keep();""")
+
@pytest.fixture
def make_strandard_name_func(temp_db_cursor):
temp_db_cursor.execute("""
temp_db_cursor.execute('CREATE EXTENSION postgis')
temp_db_cursor.execute('CREATE TABLE place (id INT)')
sqlfile = tmp_path / '1010.sql'
- sqlfile.write_text("""INSERT INTO place values (1)""")
+ sqlfile.write_text("""INSERT INTO place values (1);
+ INSERT INTO non_existant_table values (1);""")
tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql')
assert temp_db_cursor.table_rows('place') == 1
+@pytest.mark.parametrize("threads", (1, 5))
+def test_add_tiger_data_bad_file(dsn, src_dir, def_config, tmp_path, sql_preprocessor,
+ temp_db_cursor, threads, temp_db):
+ temp_db_cursor.execute('CREATE EXTENSION hstore')
+ temp_db_cursor.execute('CREATE EXTENSION postgis')
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ sqlfile = tmp_path / '1010.txt'
+ sqlfile.write_text("""Random text""")
+ tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql')
+
+ assert temp_db_cursor.table_rows('place') == 0
+
@pytest.mark.parametrize("threads", (1, 5))
def test_add_tiger_data_tarfile(dsn, src_dir, def_config, tmp_path,
temp_db_cursor, threads, temp_db, sql_preprocessor):
temp_db_cursor.execute('CREATE EXTENSION postgis')
temp_db_cursor.execute('CREATE TABLE place (id INT)')
sqlfile = tmp_path / '1010.sql'
- sqlfile.write_text("""INSERT INTO place values (1)""")
+ sqlfile.write_text("""INSERT INTO place values (1);
+ INSERT INTO non_existant_table values (1);""")
+ tar = tarfile.open("sample.tar.gz", "w:gz")
+ tar.add(sqlfile)
+ tar.close()
+ tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql')
+
+ assert temp_db_cursor.table_rows('place') == 1
+
+@pytest.mark.parametrize("threads", (1, 5))
+def test_add_tiger_data_bad_tarfile(dsn, src_dir, def_config, tmp_path,
+ temp_db_cursor, threads, temp_db, sql_preprocessor):
+ temp_db_cursor.execute('CREATE EXTENSION hstore')
+ temp_db_cursor.execute('CREATE EXTENSION postgis')
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ sqlfile = tmp_path / '1010.txt'
+ sqlfile.write_text("""Random text""")
tar = tarfile.open("sample.tar.gz", "w:gz")
tar.add(sqlfile)
tar.close()
tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql')
- assert temp_db_cursor.table_rows('place') == 1
\ No newline at end of file
+ assert temp_db_cursor.table_rows('place') == 0
\ No newline at end of file
# -------------------------------
#
# The webserver should serve the php scripts from the website directory of your
-# [project directory](../admin/import.md#creating-the-project-directory).
+# [project directory](../admin/Import.md#creating-the-project-directory).
# Therefore set up a project directory and populate the website directory:
#
mkdir $USERHOME/nominatim-project
# -------------------------------
#
# The webserver should serve the php scripts from the website directory of your
-# [project directory](../admin/import.md#creating-the-project-directory).
+# [project directory](../admin/Import.md#creating-the-project-directory).
# Therefore set up a project directory and populate the website directory:
#
mkdir $USERHOME/nominatim-project
# ======================
#
# The webserver should serve the php scripts from the website directory of your
-# [project directory](../admin/import.md#creating-the-project-directory).
+# [project directory](../admin/Import.md#creating-the-project-directory).
# Therefore set up a project directory and populate the website directory:
mkdir $USERHOME/nominatim-project
# ======================
#
# The webserver should serve the php scripts from the website directory of your
-# [project directory](../admin/import.md#creating-the-project-directory).
+# [project directory](../admin/Import.md#creating-the-project-directory).
# Therefore set up a project directory and populate the website directory:
mkdir $USERHOME/nominatim-project