`nominatim refresh --wiki-data --importance`. Updating importances for
a planet can take a couple of hours.
- ### Great Britain, USA postcodes
+ ### External postcodes
- Nominatim can use postcodes from an external source to improve searches that
- involve a GB or US postcode. This data can be optionally downloaded into the
- project directory:
+ Nominatim can use postcodes from an external source to improve searching with
+ postcodes. We provide precomputed postcodes sets for the US (using TIGER data)
+ and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads/open/CodePointOpen).
+ This data can be optionally downloaded into the project directory:
cd $PROJECT_DIR
- wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
- wget https://www.nominatim.org/data/us_postcode_data.sql.gz
+ wget https://www.nominatim.org/data/gb_postcodes.csv.gz
+ wget https://www.nominatim.org/data/us_postcodes.csv.gz
+
+ You can also add your own custom postcode sources, see
+ [Customization of postcodes](Customization.md#external-postcode-data).
## Choosing the data to import
`http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
+ Note that search query is not supported for reverse-only imports. You can run a
+ reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+
To run Nominatim via webservers like Apache or nginx, please read the
[Deployment chapter](Deployment.md).
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:
- nominatim special-phrases --import-from-wiki
+```sh
+nominatim special-phrases --import-from-wiki
+```
Note that this command downloads the phrases from the wiki link above. You
need internet access for the step.
+
+You can also import some phrases from a csv file.
+To do so, you have access to the following command:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file>
+```
+
+Note that the 2 previous commands will update the phrases from your database.
+This mean that if you import some phrases from a csv file, only the phrases
+present in the csv file will be kept into the database. All other phrases will
+be removed.
+
+If you want to only add new phrases and not update the other ones you can add
+the argument `--no-replace` to the import command. For example:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file> --no-replace
+```
+
+This will add the phrases present in the csv file into the database without
+removing the other ones.
+
+## Installing Tiger housenumber data for the US
+
+Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+address set to complement the OSM house number data in the US. You can add
+TIGER data to your own Nominatim instance by following these steps. The
+entire US adds about 10GB to your database.
+
+ 1. Get preprocessed TIGER 2020 data:
+
+ cd $PROJECT_DIR
+ wget https://nominatim.org/data/tiger2020-nominatim-preprocessed.tar.gz
+
+ 2. Import the data into your Nominatim database:
+
+ nominatim add-data --tiger-data tiger2020-nominatim-preprocessed.tar.gz
+
+ 3. Enable use of the Tiger data in your `.env` by adding:
+
+ echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
+
+ 4. Apply the new settings:
+
+ nominatim refresh --functions
+
+
+See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
+information on how the data got preprocessed.
+
"""
return self.normalizer.transliterate(phrase)
+ @staticmethod
+ def normalize_postcode(postcode):
+ """ Convert the postcode to a standardized form.
+
+ This function must yield exactly the same result as the SQL function
+ 'token_normalized_postcode()'.
+ """
+ return postcode.strip().upper()
+
+
@functools.lru_cache(maxsize=1024)
def make_standard_word(self, name):
""" Create the normalised version of the input.
return self.transliterator.transliterate(hnr)
- def add_postcodes_from_db(self):
- """ Add postcodes from the location_postcode table to the word table.
+ def update_postcodes_from_db(self):
+ """ Update postcode tokens in the word table from the location_postcode
+ table.
"""
+ to_delete = []
copystr = io.StringIO()
with self.conn.cursor() as cur:
- cur.execute("SELECT distinct(postcode) FROM location_postcode")
- for (postcode, ) in cur:
- copystr.write(postcode)
- copystr.write('\t ')
- copystr.write(self.transliterator.transliterate(postcode))
- copystr.write('\tplace\tpostcode\t0\n')
-
- copystr.seek(0)
- cur.copy_from(copystr, 'word',
- columns=['word', 'word_token', 'class', 'type',
- 'search_name_count'])
- # Don't really need an ID for postcodes....
- # cur.execute("""UPDATE word SET word_id = nextval('seq_word')
- # WHERE word_id is null and type = 'postcode'""")
+ # This finds us the rows in location_postcode and word that are
+ # missing in the other table.
+ cur.execute("""SELECT * FROM
+ (SELECT pc, word FROM
+ (SELECT distinct(postcode) as pc FROM location_postcode) p
+ FULL JOIN
+ (SELECT word FROM word
+ WHERE class ='place' and type = 'postcode') w
+ ON pc = word) x
+ WHERE pc is null or word is null""")
+
+ for postcode, word in cur:
+ if postcode is None:
+ to_delete.append(word)
+ else:
+ copystr.write(postcode)
+ copystr.write('\t ')
+ copystr.write(self.transliterator.transliterate(postcode))
+ copystr.write('\tplace\tpostcode\t0\n')
+
+ if to_delete:
+ cur.execute("""DELETE FROM WORD
+ WHERE class ='place' and type = 'postcode'
+ and word = any(%s)
+ """, (to_delete, ))
+
+ if copystr.getvalue():
+ copystr.seek(0)
+ cur.copy_from(copystr, 'word',
+ columns=['word', 'word_token', 'class', 'type',
+ 'search_name_count'])
- def update_special_phrases(self, phrases):
+ def update_special_phrases(self, phrases, should_replace):
""" Replace the search index for special phrases with the new phrases.
"""
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
columns=['word', 'word_token', 'class', 'type',
'operator', 'search_name_count'])
- if to_delete:
+ if to_delete and should_replace:
psycopg2.extras.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
def _add_postcode(self, postcode):
""" Make sure the normalized postcode is present in the word table.
"""
- if re.search(r'[:,;]', postcode) is None and not postcode in self._cache.postcodes:
- term = self.make_standard_word(postcode)
- if not term:
- return
-
- with self.conn.cursor() as cur:
- # no word_id needed for postcodes
- cur.execute("""INSERT INTO word (word, word_token, class, type,
- search_name_count)
- (SELECT pc, %s, 'place', 'postcode', 0
- FROM (VALUES (%s)) as v(pc)
- WHERE NOT EXISTS
- (SELECT * FROM word
- WHERE word = pc and class='place' and type='postcode'))
- """, (' ' + term, postcode))
- self._cache.postcodes.add(postcode)
+ if re.search(r'[:,;]', postcode) is None:
+ postcode = self.normalize_postcode(postcode)
+
+ if postcode not in self._cache.postcodes:
+ term = self.make_standard_word(postcode)
+ if not term:
+ return
+
+ with self.conn.cursor() as cur:
+ # no word_id needed for postcodes
+ cur.execute("""INSERT INTO word (word, word_token, class, type,
+ search_name_count)
+ (SELECT pc, %s, 'place', 'postcode', 0
+ FROM (VALUES (%s)) as v(pc)
+ WHERE NOT EXISTS
+ (SELECT * FROM word
+ WHERE word = pc and class='place' and type='postcode'))
+ """, (' ' + term, postcode))
+ self._cache.postcodes.add(postcode)
@staticmethod
def _split_housenumbers(hnrs):
return self.normalizer.transliterate(phrase)
- def add_postcodes_from_db(self):
- """ Add postcodes from the location_postcode table to the word table.
+ @staticmethod
+ def normalize_postcode(postcode):
+ """ Convert the postcode to a standardized form.
+
+ This function must yield exactly the same result as the SQL function
+ 'token_normalized_postcode()'.
+ """
+ return postcode.strip().upper()
+
+
+ def update_postcodes_from_db(self):
+ """ Update postcode tokens in the word table from the location_postcode
+ table.
"""
with self.conn.cursor() as cur:
- cur.execute("""SELECT count(create_postcode_id(pc))
- FROM (SELECT distinct(postcode) as pc
- FROM location_postcode) x""")
+ # This finds us the rows in location_postcode and word that are
+ # missing in the other table.
+ cur.execute("""SELECT * FROM
+ (SELECT pc, word FROM
+ (SELECT distinct(postcode) as pc FROM location_postcode) p
+ FULL JOIN
+ (SELECT word FROM word
+ WHERE class ='place' and type = 'postcode') w
+ ON pc = word) x
+ WHERE pc is null or word is null""")
+
+ to_delete = []
+ to_add = []
+
+ for postcode, word in cur:
+ if postcode is None:
+ to_delete.append(word)
+ else:
+ to_add.append(postcode)
+
+ if to_delete:
+ cur.execute("""DELETE FROM WORD
+ WHERE class ='place' and type = 'postcode'
+ and word = any(%s)
+ """, (to_delete, ))
+ if to_add:
+ cur.execute("""SELECT count(create_postcode_id(pc))
+ FROM unnest(%s) as pc
+ """, (to_add, ))
+
- def update_special_phrases(self, phrases):
+ def update_special_phrases(self, phrases, should_replace):
""" Replace the search index for special phrases with the new phrases.
"""
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
FROM (VALUES %s) as v(name, class, type, op))""",
to_add)
- if to_delete:
+ if to_delete and should_replace:
psycopg2.extras.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
def _add_postcode(self, postcode):
""" Make sure the normalized postcode is present in the word table.
"""
- def _create_postcode_from_db(pcode):
- with self.conn.cursor() as cur:
- cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
-
if re.search(r'[:,;]', postcode) is None:
- self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
+ self._cache.add_postcode(self.conn, self.normalize_postcode(postcode))
class _TokenInfo:
FROM generate_series(1, 100) as i""")
self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
- # Get postcodes that are already saved
- postcodes = OrderedDict()
- with conn.cursor() as cur:
- cur.execute("""SELECT word FROM word
- WHERE class ='place' and type = 'postcode'""")
- for row in cur:
- postcodes[row[0]] = None
- self.postcodes = _LRU(maxsize=32, init_data=postcodes)
+ # For postcodes remember the ones that have already been added
+ self.postcodes = set()
def get_housenumber(self, number):
""" Get a housenumber token from the cache.
"""
return self._cached_housenumbers.get(number)
+
+
+ def add_postcode(self, conn, postcode):
+ """ Make sure the given postcode is in the database.
+ """
+ if postcode not in self.postcodes:
+ with conn.cursor() as cur:
+ cur.execute('SELECT create_postcode_id(%s)', (postcode, ))
+ self.postcodes.add(postcode)
from nominatim import cli
from nominatim.config import Configuration
+ from nominatim.db.connection import _Connection
from nominatim.tools import refresh
from nominatim.tokenizer import factory as tokenizer_factory
from steps.utils import run_script
dbargs['user'] = self.db_user
if self.db_pass:
dbargs['password'] = self.db_pass
- conn = psycopg2.connect(**dbargs)
+ conn = psycopg2.connect(connection_factory=_Connection, **dbargs)
return conn
def next_code_coverage_file(self):
self.website_dir.cleanup()
self.website_dir = tempfile.TemporaryDirectory()
+
+ try:
+ conn = self.connect_database(dbname)
+ except:
+ conn = False
refresh.setup_website(Path(self.website_dir.name) / 'website',
- self.get_test_config())
+ self.get_test_config(), conn)
def get_test_config(self):
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
else:
- # XXX Temporary use the wiki while there is no CSV import
- # available.
- self.test_env['NOMINATIM_LANGUAGES'] = 'en'
- self.run_nominatim('special-phrases', '--import-from-wiki')
- del self.test_env['NOMINATIM_LANGUAGES']
+ csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
+ self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
except:
self.db_drop_database(self.api_test_db)
raise
""" Setup a test against a fresh, empty test database.
"""
self.setup_template_db()
- self.write_nominatim_config(self.test_db)
conn = self.connect_database(self.template_db)
conn.set_isolation_level(0)
cur = conn.cursor()
cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
conn.close()
+ self.write_nominatim_config(self.test_db)
context.db = self.connect_database(self.test_db)
context.db.autocommit = True
psycopg2.extras.register_hstore(context.db, globally=False)
def close(self):
pass
- def add_postcodes_from_db(self):
+ def normalize_postcode(self, postcode):
+ return postcode
+
+ def update_postcodes_from_db(self):
pass
- def update_special_phrases(self, phrases):
+ def update_special_phrases(self, phrases, should_replace):
self.analyser_cache['special_phrases'] = phrases
def add_country_names(self, code, names):
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
- mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
+ mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
- mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
+ mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
-def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock):
- func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
+@pytest.mark.parametrize("no_replace", [(True), (False)])
+def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
+ func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
- call_nominatim('special-phrases', '--import-from-wiki')
+ if no_replace:
+ call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
+ else:
+ call_nominatim('special-phrases', '--import-from-wiki')
+
+ assert func.called == 1
+
+@pytest.mark.parametrize("no_replace", [(True), (False)])
+def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
+ func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
+ testdata = SRC_DIR / 'test' / 'testdb'
+ csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
+
+ if no_replace:
+ call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
+ else:
+ call_nominatim('special-phrases', '--import-from-csv', csv_path)
assert func.called == 1
@pytest.mark.parametrize("command,func", [
- ('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
('address-levels', 'load_address_levels_from_file'),
('wiki-data', 'import_wikipedia_articles'),
('importance', 'recompute_importance'),
('website', 'setup_website'),
])
- def test_refresh_command(mock_func_factory, temp_db, command, func):
+ def test_refresh_command(mock_func_factory, temp_db, command, func, tokenizer_mock):
func_mock = mock_func_factory(nominatim.tools.refresh, func)
assert 0 == call_nominatim('refresh', '--' + command)
assert func_mock.called == 1
+ def test_refresh_postcodes(mock_func_factory, temp_db, tokenizer_mock):
+ func_mock = mock_func_factory(nominatim.tools.postcodes, 'update_postcodes')
+ idx_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_postcodes')
+
+ assert 0 == call_nominatim('refresh', '--postcodes')
+ assert func_mock.called == 1
+
def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
assert tokenizer_mock.update_sql_functions_called
- def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
+ def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db, tokenizer_mock):
calls = []
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
lambda *args, **kwargs: calls.append('import') or 0)
@pytest.fixture
- def create_postcode_id(table_factory, temp_db_cursor):
- table_factory('out_postcode_table', 'postcode TEXT')
-
+ def create_postcode_id(temp_db_cursor):
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
RETURNS BOOLEAN AS $$
- INSERT INTO out_postcode_table VALUES (postcode) RETURNING True;
+ INSERT INTO word (word_token, word, class, type)
+ VALUES (' ' || postcode, postcode, 'place', 'postcode')
+ RETURNING True;
$$ LANGUAGE SQL""")
assert analyzer.normalize('TEsT') == 'test'
- def test_add_postcodes_from_db(analyzer, table_factory, temp_db_cursor,
- create_postcode_id):
+ def test_update_postcodes_from_db_empty(analyzer, table_factory, word_table,
+ create_postcode_id):
table_factory('location_postcode', 'postcode TEXT',
content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
- analyzer.add_postcodes_from_db()
+ analyzer.update_postcodes_from_db()
+
+ assert word_table.count() == 3
+ assert word_table.get_postcodes() == {'1234', '12 34', 'AB23'}
+
+
+ def test_update_postcodes_from_db_add_and_remove(analyzer, table_factory, word_table,
+ create_postcode_id):
+ table_factory('location_postcode', 'postcode TEXT',
+ content=(('1234',), ('45BC', ), ('XX45', )))
+ word_table.add_postcode(' 1234', '1234')
+ word_table.add_postcode(' 5678', '5678')
+
+ analyzer.update_postcodes_from_db()
- assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
- == set((('1234', ), ('12 34', ), ('AB23',)))
+ assert word_table.count() == 3
+ assert word_table.get_postcodes() == {'1234', '45BC', 'XX45'}
- def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
- make_standard_name):
+ def test_update_special_phrase_empty_table(analyzer, word_table, make_standard_name):
analyzer.update_special_phrases([
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("strasse", "highway", "primary", "in")
- ])
+ ], True)
- assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
- FROM word WHERE class != 'place'""") \
+ assert word_table.get_special() \
== set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'),
(' könige', 'könige', 'amenity', 'royal', None),
(' strasse', 'strasse', 'highway', 'primary', 'in')))
def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
make_standard_name):
- temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
- VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
- (' bar', 'bar', 'highway', 'road', null)""")
+ word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
+ word_table.add_special(' bar', 'bar', 'highway', 'road', None)
- assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+ assert word_table.count_special() == 2
- analyzer.update_special_phrases([])
+ analyzer.update_special_phrases([], True)
- assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+ assert word_table.count_special() == 0
- def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
- make_standard_name):
- temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
- VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
- (' bar', 'bar', 'highway', 'road', null)""")
+def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
+ make_standard_name):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+ (' bar', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ analyzer.update_special_phrases([], False)
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
+ def test_update_special_phrase_modify(analyzer, word_table, make_standard_name):
+ word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
+ word_table.add_special(' bar', 'bar', 'highway', 'road', None)
- assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+ assert word_table.count_special() == 2
analyzer.update_special_phrases([
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
- ])
+ ], True)
- assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
- FROM word WHERE class != 'place'""") \
+ assert word_table.get_special() \
== set(((' prison', 'prison', 'amenity', 'prison', 'in'),
(' bar', 'bar', 'highway', 'road', None),
(' garden', 'garden', 'leisure', 'garden', 'near')))
@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
- def test_process_place_postcode(analyzer, temp_db_cursor, create_postcode_id, pc):
-
+ def test_process_place_postcode(analyzer, create_postcode_id, word_table, pc):
info = analyzer.process_place({'address': {'postcode' : pc}})
- assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
- == set(((pc, ),))
+ assert word_table.get_postcodes() == {pc, }
@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
- def test_process_place_bad_postcode(analyzer, temp_db_cursor, create_postcode_id,
- pc):
-
+ def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table, pc):
info = analyzer.process_place({'address': {'postcode' : pc}})
- assert 0 == temp_db_cursor.scalar("SELECT count(*) from out_postcode_table")
+ assert not word_table.get_postcodes()
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
assert a._make_standard_hnr('iv') == 'IV'
- def test_add_postcodes_from_db(analyzer, word_table, table_factory, temp_db_cursor):
+ def test_update_postcodes_from_db_empty(analyzer, table_factory, word_table):
table_factory('location_postcode', 'postcode TEXT',
content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
with analyzer() as a:
- a.add_postcodes_from_db()
+ a.update_postcodes_from_db()
- assert temp_db_cursor.row_set("""SELECT word, word_token from word
- """) \
- == set((('1234', ' 1234'), ('12 34', ' 12 34'), ('AB23', ' AB23')))
+ assert word_table.count() == 3
+ assert word_table.get_postcodes() == {'1234', '12 34', 'AB23'}
+
+
+ def test_update_postcodes_from_db_add_and_remove(analyzer, table_factory, word_table):
+ table_factory('location_postcode', 'postcode TEXT',
+ content=(('1234',), ('45BC', ), ('XX45', )))
+ word_table.add_postcode(' 1234', '1234')
+ word_table.add_postcode(' 5678', '5678')
+
+ with analyzer() as a:
+ a.update_postcodes_from_db()
+
+ assert word_table.count() == 3
+ assert word_table.get_postcodes() == {'1234', '45BC', 'XX45'}
def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor):
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("street", "highway", "primary", "in")
- ])
+ ], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
with analyzer() as a:
- a.update_special_phrases([])
+ a.update_special_phrases([], True)
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+ (' BAR', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ with analyzer() as a:
+ a.update_special_phrases([], False)
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
- ])
+ ], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
- def test_process_place_postcode(analyzer, temp_db_cursor, pc):
+ def test_process_place_postcode(analyzer, word_table, pc):
with analyzer() as a:
info = a.process_place({'address': {'postcode' : pc}})
- assert temp_db_cursor.row_set("""SELECT word FROM word
- WHERE class = 'place' and type = 'postcode'""") \
- == set(((pc, ),))
+ assert word_table.get_postcodes() == {pc, }
@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
- def test_process_place_bad_postcode(analyzer, temp_db_cursor, pc):
+ def test_process_place_bad_postcode(analyzer, word_table, pc):
with analyzer() as a:
info = a.process_place({'address': {'postcode' : pc}})
- assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM word
- WHERE class = 'place' and type = 'postcode'""")
+ assert not word_table.get_postcodes()
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])