1 -- Get tokens used for searching the given place.
3 -- These are the tokens that will be saved in the search_name table.
4 CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
7 SELECT (info->>'names')::INTEGER[]
8 $$ LANGUAGE SQL IMMUTABLE STRICT;
11 -- Get tokens for matching the place name against others.
13 -- This should usually be restricted to full name tokens.
14 CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
17 SELECT (info->>'names')::INTEGER[]
18 $$ LANGUAGE SQL IMMUTABLE STRICT;
21 -- Return the housenumber tokens applicable for the place.
22 CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
25 SELECT (info->>'hnr_tokens')::INTEGER[]
26 $$ LANGUAGE SQL IMMUTABLE STRICT;
29 -- Return the housenumber in the form that it can be matched during search.
30 CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
34 $$ LANGUAGE SQL IMMUTABLE STRICT;
37 CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
40 SELECT info->>'street' is not null;
41 $$ LANGUAGE SQL IMMUTABLE;
44 CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
47 SELECT info->>'place_match' is not null;
48 $$ LANGUAGE SQL IMMUTABLE;
51 CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
54 SELECT (info->>'street')::INTEGER[] && street_tokens
55 $$ LANGUAGE SQL IMMUTABLE STRICT;
58 CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
61 SELECT (info->>'place_match')::INTEGER[] && place_tokens
62 $$ LANGUAGE SQL IMMUTABLE STRICT;
65 CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
68 SELECT (info->>'place_search')::INTEGER[]
69 $$ LANGUAGE SQL IMMUTABLE STRICT;
72 CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
75 SELECT * FROM jsonb_object_keys(info->'addr');
76 $$ LANGUAGE SQL IMMUTABLE STRICT;
79 CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
82 SELECT (info->'addr'->key->>0)::INTEGER[];
83 $$ LANGUAGE SQL IMMUTABLE STRICT;
86 CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
89 SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
90 $$ LANGUAGE SQL IMMUTABLE STRICT;
93 CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
96 SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
97 $$ LANGUAGE SQL IMMUTABLE STRICT;
100 -- Return token info that should be saved permanently in the database.
101 CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
105 $$ LANGUAGE SQL IMMUTABLE STRICT;
107 --------------- private functions ----------------------------------------------
109 -- Functions for term normalisation and access to the 'word' table.
111 CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
112 AS '{{ modulepath }}/nominatim.so', 'transliteration'
113 LANGUAGE c IMMUTABLE STRICT;
116 CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
117 AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
118 LANGUAGE c IMMUTABLE STRICT;
121 CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
126 o := public.gettokenstring(public.transliteration(name));
127 RETURN trim(substr(o,1,length(o)));
130 LANGUAGE plpgsql IMMUTABLE;
132 -- returns NULL if the word is too common
133 CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
138 return_word_id INTEGER;
141 lookup_token := trim(lookup_word);
142 SELECT min(word_id), max(search_name_count) FROM word
143 WHERE word_token = lookup_token and class is null and type is null
144 INTO return_word_id, count;
145 IF return_word_id IS NULL THEN
146 return_word_id := nextval('seq_word');
147 INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
149 IF count > {{ max_word_freq }} THEN
150 return_word_id := NULL;
153 RETURN return_word_id;
159 -- Create housenumber tokens from an OSM addr:housenumber.
160 -- The housnumber is split at comma and semicolon as necessary.
161 -- The function returns the normalized form of the housenumber suitable
163 CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
168 SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
169 INTO normtext, tokens
170 FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
171 FROM (SELECT make_standard_name(h) as lookup_word
172 FROM unnest(housenumbers) h) x) y;
174 $$ LANGUAGE plpgsql STABLE STRICT;
177 CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
182 return_word_id INTEGER;
184 lookup_token := ' ' || trim(lookup_word);
185 SELECT min(word_id) FROM word
186 WHERE word_token = lookup_token and class='place' and type='house'
188 IF return_word_id IS NULL THEN
189 return_word_id := nextval('seq_word');
190 INSERT INTO word VALUES (return_word_id, lookup_token, null,
191 'place', 'house', null, 0);
193 RETURN return_word_id;
199 CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
205 return_word_id INTEGER;
207 lookup_token := ' ' || make_standard_name(postcode);
209 SELECT word_id FROM word
210 WHERE word_token = lookup_token and word = postcode
211 and class='place' and type='postcode'
216 INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
217 'place', 'postcode', null, 0);
224 CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
229 nospace_lookup_token TEXT;
230 return_word_id INTEGER;
232 lookup_token := ' '||trim(lookup_word);
233 SELECT min(word_id) FROM word
234 WHERE word_token = lookup_token and class is null and type is null
236 IF return_word_id IS NULL THEN
237 return_word_id := nextval('seq_word');
238 INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
239 null, null, null, 0);
241 RETURN return_word_id;
247 -- Normalize a string and lookup its word ids (partial words).
248 CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
254 return_word_id INTEGER[];
258 words := string_to_array(make_standard_name(lookup_word), ' ');
259 IF array_upper(words, 1) IS NOT NULL THEN
260 FOR j IN 1..array_upper(words, 1) LOOP
261 IF (words[j] != '') THEN
262 SELECT array_agg(word_id) INTO word_ids
264 WHERE word_token = words[j] and class is null and type is null;
266 IF word_ids IS NULL THEN
267 id := nextval('seq_word');
268 INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
269 return_word_id := return_word_id || id;
271 return_word_id := array_merge(return_word_id, word_ids);
277 RETURN return_word_id;
283 -- Normalize a string and look up its name ids (full words).
284 CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
289 return_word_ids INTEGER[];
291 lookup_token := ' '|| make_standard_name(lookup_word);
292 SELECT array_agg(word_id) FROM word
293 WHERE word_token = lookup_token and class is null and type is null
294 INTO return_word_ids;
295 RETURN return_word_ids;
298 LANGUAGE plpgsql STABLE STRICT;
301 CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
312 result := '{}'::INTEGER[];
314 FOR value IN SELECT unnest(regexp_split_to_array(svals(src), E'[,;]')) LOOP
316 s := make_standard_name(value);
317 w := getorcreate_name_id(s, value);
319 IF not(ARRAY[w] <@ result) THEN
320 result := result || w;
323 -- partial single-word terms
324 words := string_to_array(s, ' ');
325 IF array_upper(words, 1) IS NOT NULL THEN
326 FOR j IN 1..array_upper(words, 1) LOOP
327 IF (words[j] != '') THEN
328 w = getorcreate_word_id(words[j]);
329 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
330 result := result || w;
336 -- consider parts before an opening braket a full word as well
337 words := regexp_split_to_array(value, E'[(]');
338 IF array_upper(words, 1) > 1 THEN
339 s := make_standard_name(words[1]);
341 w := getorcreate_name_id(s, words[1]);
342 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
343 result := result || w;
348 s := regexp_replace(value, '市$', '');
350 s := make_standard_name(s);
352 w := getorcreate_name_id(s, value);
353 IF NOT (ARRAY[w] <@ result) THEN
354 result := result || w;
367 CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
377 s := make_standard_name(src);
378 w := getorcreate_name_id(s, src);
380 w := getorcreate_word_id(s);
382 words := string_to_array(s, ' ');
383 IF array_upper(words, 1) IS NOT NULL THEN
384 FOR j IN 1..array_upper(words, 1) LOOP
385 IF (words[j] != '') THEN
386 w := getorcreate_word_id(words[j]);
391 words := regexp_split_to_array(src, E'[,;()]');
392 IF array_upper(words, 1) != 1 THEN
393 FOR j IN 1..array_upper(words, 1) LOOP
394 s := make_standard_name(words[j]);
396 w := getorcreate_word_id(s);
401 s := regexp_replace(src, '市$', '');
403 s := make_standard_name(s);
405 w := getorcreate_name_id(s, src);