def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',),
variants=('~gasse -> gasse', 'street => st', ),
- sanitizers=[], with_housenumber=False):
+ sanitizers=[], with_housenumber=False,
+ with_postcode=False):
cfgstr = {'normalization': list(norm),
'sanitizers': sanitizers,
'transliteration': list(trans),
if with_housenumber:
cfgstr['token-analysis'].append({'id': '@housenumber',
'analyzer': 'housenumbers'})
+ if with_postcode:
+ cfgstr['token-analysis'].append({'id': '@postcode',
+ 'analyzer': 'postcodes'})
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr))
tok.loader = nominatim.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
anl.normalize_postcode('38 Б') == '38 Б'
-def test_update_postcodes_from_db_empty(analyzer, table_factory, word_table):
- table_factory('location_postcode', 'postcode TEXT',
- content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
+class TestPostcodes:
- with analyzer() as anl:
- anl.update_postcodes_from_db()
+ @pytest.fixture(autouse=True)
+ def setup(self, analyzer, sql_functions):
+ sanitizers = [{'step': 'clean-postcodes'}]
+ with analyzer(sanitizers=sanitizers, with_postcode=True) as anl:
+ self.analyzer = anl
+ yield anl
- assert word_table.count() == 3
- assert word_table.get_postcodes() == {'1234', '12 34', 'AB23'}
+ def process_postcode(self, cc, postcode):
+ return self.analyzer.process_place(PlaceInfo({'country_code': cc,
+ 'address': {'postcode': postcode}}))
-def test_update_postcodes_from_db_add_and_remove(analyzer, table_factory, word_table):
- table_factory('location_postcode', 'postcode TEXT',
- content=(('1234',), ('45BC', ), ('XX45', )))
- word_table.add_postcode(' 1234', '1234')
- word_table.add_postcode(' 5678', '5678')
- with analyzer() as anl:
- anl.update_postcodes_from_db()
+ def test_update_postcodes_from_db_empty(self, table_factory, word_table):
+ table_factory('location_postcode', 'country_code TEXT, postcode TEXT',
+ content=(('de', '12345'), ('se', '132 34'),
+ ('bm', 'AB23'), ('fr', '12345')))
+
+ self.analyzer.update_postcodes_from_db()
+
+ assert word_table.count() == 5
+ assert word_table.get_postcodes() == {'12345', '132 34@132 34', 'AB 23@AB 23'}
+
+
+ def test_update_postcodes_from_db_ambigious(self, table_factory, word_table):
+ table_factory('location_postcode', 'country_code TEXT, postcode TEXT',
+ content=(('in', '123456'), ('sg', '123456')))
+
+ self.analyzer.update_postcodes_from_db()
+
+ assert word_table.count() == 3
+ assert word_table.get_postcodes() == {'123456', '123456@123 456'}
+
+
+ def test_update_postcodes_from_db_add_and_remove(self, table_factory, word_table):
+ table_factory('location_postcode', 'country_code TEXT, postcode TEXT',
+ content=(('ch', '1234'), ('bm', 'BC 45'), ('bm', 'XX45')))
+ word_table.add_postcode(' 1234', '1234')
+ word_table.add_postcode(' 5678', '5678')
+
+ self.analyzer.update_postcodes_from_db()
+
+ assert word_table.count() == 5
+ assert word_table.get_postcodes() == {'1234', 'BC 45@BC 45', 'XX 45@XX 45'}
+
+
+ def test_process_place_postcode_simple(self, word_table):
+ info = self.process_postcode('de', '12345')
+
+ assert info['postcode'] == '12345'
+
+ assert word_table.get_postcodes() == {'12345', }
+
+
+ def test_process_place_postcode_with_space(self, word_table):
+ info = self.process_postcode('in', '123 567')
+
+ assert info['postcode'] == '123567'
+
+ assert word_table.get_postcodes() == {'123567@123 567', }
- assert word_table.count() == 3
- assert word_table.get_postcodes() == {'1234', '45BC', 'XX45'}
def test_update_special_phrase_empty_table(analyzer, word_table):