1 -- Get tokens used for searching the given place.
3 -- These are the tokens that will be saved in the search_name table.
4 CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
7 SELECT (info->>'names')::INTEGER[]
8 $$ LANGUAGE SQL IMMUTABLE STRICT;
11 -- Get tokens for matching the place name against others.
13 -- This should usually be restricted to full name tokens.
14 CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
17 SELECT (info->>'names')::INTEGER[]
18 $$ LANGUAGE SQL IMMUTABLE STRICT;
21 -- Return the housenumber tokens applicable for the place.
22 CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
25 SELECT (info->>'hnr_tokens')::INTEGER[]
26 $$ LANGUAGE SQL IMMUTABLE STRICT;
29 -- Return the housenumber in the form that it can be matched during search.
30 CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
34 $$ LANGUAGE SQL IMMUTABLE STRICT;
37 CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
40 SELECT (info->>'street')::INTEGER[]
41 $$ LANGUAGE SQL IMMUTABLE STRICT;
44 CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
47 SELECT (info->>'place_match')::INTEGER[]
48 $$ LANGUAGE SQL IMMUTABLE STRICT;
51 CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
54 SELECT (info->>'place_search')::INTEGER[]
55 $$ LANGUAGE SQL IMMUTABLE STRICT;
58 DROP TYPE IF EXISTS token_addresstoken CASCADE;
59 CREATE TYPE token_addresstoken AS (
65 CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
66 RETURNS SETOF token_addresstoken
68 SELECT key, (value->>1)::int[] as match_tokens,
69 (value->>0)::int[] as search_tokens
70 FROM jsonb_each(info->'addr');
71 $$ LANGUAGE SQL IMMUTABLE STRICT;
74 CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
77 SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
78 $$ LANGUAGE SQL IMMUTABLE STRICT;
81 -- Return token info that should be saved permanently in the database.
82 CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
86 $$ LANGUAGE SQL IMMUTABLE STRICT;
88 --------------- private functions ----------------------------------------------
90 -- Functions for term normalisation and access to the 'word' table.
92 CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
93 AS '{{ modulepath }}/nominatim.so', 'transliteration'
94 LANGUAGE c IMMUTABLE STRICT;
97 CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
98 AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
99 LANGUAGE c IMMUTABLE STRICT;
102 CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
107 o := public.gettokenstring(public.transliteration(name));
108 RETURN trim(substr(o,1,length(o)));
111 LANGUAGE plpgsql IMMUTABLE;
113 -- returns NULL if the word is too common
114 CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
119 return_word_id INTEGER;
122 lookup_token := trim(lookup_word);
123 SELECT min(word_id), max(search_name_count) FROM word
124 WHERE word_token = lookup_token and class is null and type is null
125 INTO return_word_id, count;
126 IF return_word_id IS NULL THEN
127 return_word_id := nextval('seq_word');
128 INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
130 IF count > {{ max_word_freq }} THEN
131 return_word_id := NULL;
134 RETURN return_word_id;
140 -- Create housenumber tokens from an OSM addr:housenumber.
141 -- The housnumber is split at comma and semicolon as necessary.
142 -- The function returns the normalized form of the housenumber suitable
144 CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
149 SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
150 INTO normtext, tokens
151 FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
152 FROM (SELECT make_standard_name(h) as lookup_word
153 FROM unnest(housenumbers) h) x) y;
155 $$ LANGUAGE plpgsql STABLE STRICT;
158 CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
163 return_word_id INTEGER;
165 lookup_token := ' ' || trim(lookup_word);
166 SELECT min(word_id) FROM word
167 WHERE word_token = lookup_token and class='place' and type='house'
169 IF return_word_id IS NULL THEN
170 return_word_id := nextval('seq_word');
171 INSERT INTO word VALUES (return_word_id, lookup_token, null,
172 'place', 'house', null, 0);
174 RETURN return_word_id;
180 CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
186 return_word_id INTEGER;
188 lookup_token := ' ' || make_standard_name(postcode);
190 SELECT word_id FROM word
191 WHERE word_token = lookup_token and word = postcode
192 and class='place' and type='postcode'
197 INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
198 'place', 'postcode', null, 0);
205 CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
206 lookup_country_code varchar(2))
211 return_word_id INTEGER;
213 lookup_token := ' '||trim(lookup_word);
214 SELECT min(word_id) FROM word
215 WHERE word_token = lookup_token and country_code=lookup_country_code
217 IF return_word_id IS NULL THEN
218 return_word_id := nextval('seq_word');
219 INSERT INTO word VALUES (return_word_id, lookup_token, null,
220 null, null, lookup_country_code, 0);
222 RETURN return_word_id;
228 CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
229 lookup_class text, lookup_type text)
234 return_word_id INTEGER;
236 lookup_token := ' '||trim(lookup_word);
237 SELECT min(word_id) FROM word
238 WHERE word_token = lookup_token and word = normalized_word
239 and class = lookup_class and type = lookup_type
241 IF return_word_id IS NULL THEN
242 return_word_id := nextval('seq_word');
243 INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
244 lookup_class, lookup_type, null, 0);
246 RETURN return_word_id;
252 CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
253 normalized_word TEXT,
261 return_word_id INTEGER;
263 lookup_token := ' '||trim(lookup_word);
264 SELECT min(word_id) FROM word
265 WHERE word_token = lookup_token and word = normalized_word
266 and class = lookup_class and type = lookup_type and operator = op
268 IF return_word_id IS NULL THEN
269 return_word_id := nextval('seq_word');
270 INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
271 lookup_class, lookup_type, null, 0, op);
273 RETURN return_word_id;
279 CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
284 nospace_lookup_token TEXT;
285 return_word_id INTEGER;
287 lookup_token := ' '||trim(lookup_word);
288 SELECT min(word_id) FROM word
289 WHERE word_token = lookup_token and class is null and type is null
291 IF return_word_id IS NULL THEN
292 return_word_id := nextval('seq_word');
293 INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
294 null, null, null, 0);
296 RETURN return_word_id;
302 CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT)
307 RETURN getorcreate_name_id(lookup_word, '');
312 -- Normalize a string and lookup its word ids (partial words).
313 CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
319 return_word_id INTEGER[];
323 words := string_to_array(make_standard_name(lookup_word), ' ');
324 IF array_upper(words, 1) IS NOT NULL THEN
325 FOR j IN 1..array_upper(words, 1) LOOP
326 IF (words[j] != '') THEN
327 SELECT array_agg(word_id) INTO word_ids
329 WHERE word_token = words[j] and class is null and type is null;
331 IF word_ids IS NULL THEN
332 id := nextval('seq_word');
333 INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
334 return_word_id := return_word_id || id;
336 return_word_id := array_merge(return_word_id, word_ids);
342 RETURN return_word_id;
348 -- Normalize a string and look up its name ids (full words).
349 CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
354 return_word_ids INTEGER[];
356 lookup_token := ' '|| make_standard_name(lookup_word);
357 SELECT array_agg(word_id) FROM word
358 WHERE word_token = lookup_token and class is null and type is null
359 INTO return_word_ids;
360 RETURN return_word_ids;
363 LANGUAGE plpgsql STABLE STRICT;
366 CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2))
376 FOR item IN SELECT (each(src)).* LOOP
378 s := make_standard_name(item.value);
379 w := getorcreate_country(s, country_code);
381 words := regexp_split_to_array(item.value, E'[,;()]');
382 IF array_upper(words, 1) != 1 THEN
383 FOR j IN 1..array_upper(words, 1) LOOP
384 s := make_standard_name(words[j]);
386 w := getorcreate_country(s, country_code);
396 CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
407 result := '{}'::INTEGER[];
409 FOR item IN SELECT (each(src)).* LOOP
411 s := make_standard_name(item.value);
412 w := getorcreate_name_id(s, item.value);
414 IF not(ARRAY[w] <@ result) THEN
415 result := result || w;
418 w := getorcreate_word_id(s);
420 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
421 result := result || w;
424 words := string_to_array(s, ' ');
425 IF array_upper(words, 1) IS NOT NULL THEN
426 FOR j IN 1..array_upper(words, 1) LOOP
427 IF (words[j] != '') THEN
428 w = getorcreate_word_id(words[j]);
429 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
430 result := result || w;
436 words := regexp_split_to_array(item.value, E'[,;()]');
437 IF array_upper(words, 1) != 1 THEN
438 FOR j IN 1..array_upper(words, 1) LOOP
439 s := make_standard_name(words[j]);
441 w := getorcreate_word_id(s);
442 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
443 result := result || w;
449 s := regexp_replace(item.value, '市$', '');
450 IF s != item.value THEN
451 s := make_standard_name(s);
453 w := getorcreate_name_id(s, item.value);
454 IF NOT (ARRAY[w] <@ result) THEN
455 result := result || w;
468 CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
478 s := make_standard_name(src);
479 w := getorcreate_name_id(s, src);
481 w := getorcreate_word_id(s);
483 words := string_to_array(s, ' ');
484 IF array_upper(words, 1) IS NOT NULL THEN
485 FOR j IN 1..array_upper(words, 1) LOOP
486 IF (words[j] != '') THEN
487 w := getorcreate_word_id(words[j]);
492 words := regexp_split_to_array(src, E'[,;()]');
493 IF array_upper(words, 1) != 1 THEN
494 FOR j IN 1..array_upper(words, 1) LOOP
495 s := make_standard_name(words[j]);
497 w := getorcreate_word_id(s);
502 s := regexp_replace(src, '市$', '');
504 s := make_standard_name(s);
506 w := getorcreate_name_id(s, src);