def _process_place_address(self, token_info, address):
hnrs = []
addr_terms = []
+ streets = []
for item in address:
if item.kind == 'postcode':
self._add_postcode(item.name)
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(item.name)
elif item.kind == 'street':
- token_info.add_street(self._compute_partial_tokens(item.name))
+ token = self._retrieve_full_token(item.name)
+ if token:
+ streets.append(token)
elif item.kind == 'place':
token_info.add_place(self._compute_partial_tokens(item.name))
elif not item.kind.startswith('_') and \
if addr_terms:
token_info.add_address_terms(addr_terms)
+ if streets:
+ token_info.add_street(streets)
+
def _compute_partial_tokens(self, name):
""" Normalize the given term, split it into partial words and return
return tokens
+ def _retrieve_full_token(self, name):
+ """ Get the full name token for the given name, if it exists.
+ The name is only retrived for the standard analyser.
+ """
+ norm_name = self._normalized(name)
+
+ # return cached if possible
+ if norm_name in self._cache.fulls:
+ return self._cache.fulls[norm_name]
+
+ # otherwise compute
+ full, _ = self._cache.names.get(norm_name, (None, None))
+
+ if full is None:
+ with self.conn.cursor() as cur:
+ cur.execute("SELECT word_id FROM word WHERE word = %s and type = 'W' LIMIT 1",
+ (norm_name, ))
+ if cur.rowcount > 0:
+ full = cur.fetchone()[0]
+
+ self._cache.fulls[norm_name] = full
+
+ return full
+
+
def _compute_name_tokens(self, names):
""" Computes the full name and partial name tokens for the given
dictionary of names.
def add_street(self, tokens):
""" Add addr:street match terms.
"""
- if tokens:
- self.data['street'] = self._mk_array(tokens)
+ self.data['street'] = self._mk_array(tokens)
def add_place(self, tokens):
def __init__(self):
self.names = {}
self.partials = {}
+ self.fulls = {}
self.postcodes = set()
self.housenumbers = {}
def test_process_place_street(self):
+ self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
info = self.process_address(street='Grand Road')
- assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD')
+ assert eval(info['street']) == self.name_token_set('#Grand Road')
+
+
+ def test_process_place_nonexisting_street(self):
+ info = self.process_address(street='Grand Road')
+
+ assert 'street' not in info
+
+
+ def test_process_place_multiple_street_tags(self):
+ self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
+ 'ref': '05989'}}))
+ info = self.process_address(**{'street': 'Grand Road',
+ 'street:sym_ul': '05989'})
+
+ assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
def test_process_place_street_empty(self):
assert 'street' not in info
+ def test_process_place_street_from_cache(self):
+ self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
+ self.process_address(street='Grand Road')
+
+ # request address again
+ info = self.process_address(street='Grand Road')
+
+ assert eval(info['street']) == self.name_token_set('#Grand Road')
+
+
def test_process_place_place(self):
info = self.process_address(place='Honu Lulu')