s TEXT;
w INTEGER;
words TEXT[];
- item RECORD;
+ value TEXT;
j INTEGER;
BEGIN
result := '{}'::INTEGER[];
- FOR item IN SELECT (each(src)).* LOOP
-
- s := make_standard_name(item.value);
- w := getorcreate_name_id(s, item.value);
+ FOR value IN SELECT unnest(regexp_split_to_array(svals(src), E'[,;]')) LOOP
+ -- full name
+ s := make_standard_name(value);
+ w := getorcreate_name_id(s, value);
IF not(ARRAY[w] <@ result) THEN
result := result || w;
END IF;
- w := getorcreate_word_id(s);
-
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
-
+ -- partial single-word terms
words := string_to_array(s, ' ');
IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP
END LOOP;
END IF;
- words := regexp_split_to_array(item.value, E'[,;()]');
- IF array_upper(words, 1) != 1 THEN
- FOR j IN 1..array_upper(words, 1) LOOP
- s := make_standard_name(words[j]);
- IF s != '' THEN
- w := getorcreate_word_id(s);
- IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
- result := result || w;
- END IF;
+ -- consider parts before an opening braket a full word as well
+ words := regexp_split_to_array(value, E'[(]');
+ IF array_upper(words, 1) > 1 THEN
+ s := make_standard_name(words[1]);
+ IF s != '' THEN
+ w := getorcreate_name_id(s, words[1]);
+ IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
+ result := result || w;
END IF;
- END LOOP;
+ END IF;
END IF;
- s := regexp_replace(item.value, '市$', '');
- IF s != item.value THEN
+ s := regexp_replace(value, '市$', '');
+ IF s != value THEN
s := make_standard_name(s);
IF s != '' THEN
- w := getorcreate_name_id(s, item.value);
+ w := getorcreate_name_id(s, value);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
names = place.get('name')
if names:
- full_names = set((self.make_standard_word(name) for name in names.values()))
- full_names.discard('')
+ full_names = self._compute_full_names(names)
token_info.add_names(self.conn, full_names)
return token_info.data
+ def _compute_full_names(self, names):
+ """ Return the set of all full name word ids to be used with the
+ given dictionary of names.
+ """
+ full_names = set()
+ for name in (n for ns in names.values() for n in re.split('[;,]', ns)):
+ word = self.make_standard_word(name)
+ if word:
+ full_names.add(word)
+
+ brace_split = name.split('(', 2)
+ if len(brace_split) > 1:
+ word = self.make_standard_word(brace_split[0])
+ if word:
+ full_names.add(word)
+
+ return full_names
+
+
def _add_postcode(self, postcode):
""" Make sure the normalized postcode is present in the word table.
"""
Feature: Creation of search terms
Tests that search_name table is filled correctly
+ Scenario Outline: Comma- and semicolon separated names appear as full names
+ Given the places
+ | osm | class | type | name+alt_name |
+ | N1 | place | city | New York<sep>Big Apple |
+ When importing
+ Then search_name contains
+ | object | name_vector |
+ | N1 | #New York, #Big Apple |
+
+ Examples:
+ | sep |
+ | , |
+ | ; |
+
+ Scenario Outline: Name parts before brackets appear as full names
+ Given the places
+ | osm | class | type | name+name |
+ | N1 | place | city | Halle (Saale) |
+ When importing
+ Then search_name contains
+ | object | name_vector |
+ | N1 | #Halle Saale, #Halle |
+
Scenario: Unnamed POIs have no search entry
Given the scene roads-with-pois
And the places
When importing
Then search_name contains
| object | nameaddress_vector |
- | N1 | Rose Street, Little, Big, Town |
+ | N1 | #Rose Street, rose, Little, Big, Town |
When searching for "23 Rose Street, Little Big Town"
Then results contain
| osm_type | osm_id | name |
def test_process_place_names(analyzer, getorcreate_term_id):
-
with analyzer() as anl:
info = anl.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
- assert info['names'] == '{1,2,3,4,5,6}'
+ assert info['names'] == '{1,2,3,4,5}'
+
+
+@pytest.mark.parametrize('sep', [',' , ';'])
+def test_full_names_with_separator(analyzer, getorcreate_term_id, sep):
+ with analyzer() as anl:
+ full_names =
+ anl._compute_full_names({'name' : sep.join(('New York', 'Big Apple'))})
+
+ expect = set((anl.make_standard_word(w) for w in ('New York', 'Big Apple')))
+
+ assert full_names == expect
+
+
+def test_process_place_names_with_bracket(analyzer, getorcreate_term_id):
+ with analyzer() as anl:
+ info = anl.process_place({'name' :
+ {'name' : 'Houseboat (left)'}})
+
+ expect = set((anl.make_standard_word(w) for w in
+ (' houseboat', ' houseboat left', 'houseboat', 'left')))
+
+ assert eval(info['names']) == expect
@pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])