1 -- Get tokens used for searching the given place.
3 -- These are the tokens that will be saved in the search_name table.
4 CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
7 SELECT (info->>'names')::INTEGER[]
8 $$ LANGUAGE SQL IMMUTABLE STRICT;
11 -- Get tokens for matching the place name against others.
13 -- This should usually be restricted to full name tokens.
14 CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
17 SELECT (info->>'names')::INTEGER[]
18 $$ LANGUAGE SQL IMMUTABLE STRICT;
21 -- Return the housenumber tokens applicable for the place.
22 CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
25 SELECT (info->>'hnr_tokens')::INTEGER[]
26 $$ LANGUAGE SQL IMMUTABLE STRICT;
29 -- Return the housenumber in the form that it can be matched during search.
30 CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
34 $$ LANGUAGE SQL IMMUTABLE STRICT;
37 -- Return token info that should be saved permanently in the database.
38 CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
42 $$ LANGUAGE SQL IMMUTABLE STRICT;
44 --------------- private functions ----------------------------------------------
46 -- Functions for term normalisation and access to the 'word' table.
48 CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
49 AS '{{ modulepath }}/nominatim.so', 'transliteration'
50 LANGUAGE c IMMUTABLE STRICT;
53 CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
54 AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
55 LANGUAGE c IMMUTABLE STRICT;
58 CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
63 o := public.gettokenstring(public.transliteration(name));
64 RETURN trim(substr(o,1,length(o)));
67 LANGUAGE plpgsql IMMUTABLE;
69 -- returns NULL if the word is too common
70 CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
75 return_word_id INTEGER;
78 lookup_token := trim(lookup_word);
79 SELECT min(word_id), max(search_name_count) FROM word
80 WHERE word_token = lookup_token and class is null and type is null
81 INTO return_word_id, count;
82 IF return_word_id IS NULL THEN
83 return_word_id := nextval('seq_word');
84 INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
86 IF count > {{ max_word_freq }} THEN
87 return_word_id := NULL;
90 RETURN return_word_id;
96 -- Create housenumber tokens from an OSM addr:housenumber.
97 -- The housnumber is split at comma and semicolon as necessary.
98 -- The function returns the normalized form of the housenumber suitable
100 CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
105 SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
106 INTO normtext, tokens
107 FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
108 FROM (SELECT make_standard_name(h) as lookup_word
109 FROM unnest(housenumbers) h) x) y;
111 $$ LANGUAGE plpgsql STABLE STRICT;
114 CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
119 return_word_id INTEGER;
121 lookup_token := ' ' || trim(lookup_word);
122 SELECT min(word_id) FROM word
123 WHERE word_token = lookup_token and class='place' and type='house'
125 IF return_word_id IS NULL THEN
126 return_word_id := nextval('seq_word');
127 INSERT INTO word VALUES (return_word_id, lookup_token, null,
128 'place', 'house', null, 0);
130 RETURN return_word_id;
136 CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
142 return_word_id INTEGER;
144 lookup_word := upper(trim(postcode));
145 lookup_token := ' ' || make_standard_name(lookup_word);
146 SELECT min(word_id) FROM word
147 WHERE word_token = lookup_token and word = lookup_word
148 and class='place' and type='postcode'
150 IF return_word_id IS NULL THEN
151 return_word_id := nextval('seq_word');
152 INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
153 'place', 'postcode', null, 0);
155 RETURN return_word_id;
161 CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
162 lookup_country_code varchar(2))
167 return_word_id INTEGER;
169 lookup_token := ' '||trim(lookup_word);
170 SELECT min(word_id) FROM word
171 WHERE word_token = lookup_token and country_code=lookup_country_code
173 IF return_word_id IS NULL THEN
174 return_word_id := nextval('seq_word');
175 INSERT INTO word VALUES (return_word_id, lookup_token, null,
176 null, null, lookup_country_code, 0);
178 RETURN return_word_id;
184 CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
185 lookup_class text, lookup_type text)
190 return_word_id INTEGER;
192 lookup_token := ' '||trim(lookup_word);
193 SELECT min(word_id) FROM word
194 WHERE word_token = lookup_token and word = normalized_word
195 and class = lookup_class and type = lookup_type
197 IF return_word_id IS NULL THEN
198 return_word_id := nextval('seq_word');
199 INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
200 lookup_class, lookup_type, null, 0);
202 RETURN return_word_id;
208 CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
209 normalized_word TEXT,
217 return_word_id INTEGER;
219 lookup_token := ' '||trim(lookup_word);
220 SELECT min(word_id) FROM word
221 WHERE word_token = lookup_token and word = normalized_word
222 and class = lookup_class and type = lookup_type and operator = op
224 IF return_word_id IS NULL THEN
225 return_word_id := nextval('seq_word');
226 INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
227 lookup_class, lookup_type, null, 0, op);
229 RETURN return_word_id;
235 CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
240 nospace_lookup_token TEXT;
241 return_word_id INTEGER;
243 lookup_token := ' '||trim(lookup_word);
244 SELECT min(word_id) FROM word
245 WHERE word_token = lookup_token and class is null and type is null
247 IF return_word_id IS NULL THEN
248 return_word_id := nextval('seq_word');
249 INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
250 null, null, null, 0);
252 RETURN return_word_id;
258 CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT)
263 RETURN getorcreate_name_id(lookup_word, '');
268 -- Normalize a string and lookup its word ids (partial words).
269 CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
275 return_word_id INTEGER[];
279 words := string_to_array(make_standard_name(lookup_word), ' ');
280 IF array_upper(words, 1) IS NOT NULL THEN
281 FOR j IN 1..array_upper(words, 1) LOOP
282 IF (words[j] != '') THEN
283 SELECT array_agg(word_id) INTO word_ids
285 WHERE word_token = words[j] and class is null and type is null;
287 IF word_ids IS NULL THEN
288 id := nextval('seq_word');
289 INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
290 return_word_id := return_word_id || id;
292 return_word_id := array_merge(return_word_id, word_ids);
298 RETURN return_word_id;
304 -- Normalize a string and look up its name ids (full words).
305 CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
310 return_word_ids INTEGER[];
312 lookup_token := ' '|| make_standard_name(lookup_word);
313 SELECT array_agg(word_id) FROM word
314 WHERE word_token = lookup_token and class is null and type is null
315 INTO return_word_ids;
316 RETURN return_word_ids;
319 LANGUAGE plpgsql STABLE STRICT;
322 CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2))
332 FOR item IN SELECT (each(src)).* LOOP
334 s := make_standard_name(item.value);
335 w := getorcreate_country(s, country_code);
337 words := regexp_split_to_array(item.value, E'[,;()]');
338 IF array_upper(words, 1) != 1 THEN
339 FOR j IN 1..array_upper(words, 1) LOOP
340 s := make_standard_name(words[j]);
342 w := getorcreate_country(s, country_code);
352 CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
363 result := '{}'::INTEGER[];
365 FOR item IN SELECT (each(src)).* LOOP
367 s := make_standard_name(item.value);
368 w := getorcreate_name_id(s, item.value);
370 IF not(ARRAY[w] <@ result) THEN
371 result := result || w;
374 w := getorcreate_word_id(s);
376 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
377 result := result || w;
380 words := string_to_array(s, ' ');
381 IF array_upper(words, 1) IS NOT NULL THEN
382 FOR j IN 1..array_upper(words, 1) LOOP
383 IF (words[j] != '') THEN
384 w = getorcreate_word_id(words[j]);
385 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
386 result := result || w;
392 words := regexp_split_to_array(item.value, E'[,;()]');
393 IF array_upper(words, 1) != 1 THEN
394 FOR j IN 1..array_upper(words, 1) LOOP
395 s := make_standard_name(words[j]);
397 w := getorcreate_word_id(s);
398 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
399 result := result || w;
405 s := regexp_replace(item.value, '市$', '');
406 IF s != item.value THEN
407 s := make_standard_name(s);
409 w := getorcreate_name_id(s, item.value);
410 IF NOT (ARRAY[w] <@ result) THEN
411 result := result || w;
424 CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
434 s := make_standard_name(src);
435 w := getorcreate_name_id(s, src);
437 w := getorcreate_word_id(s);
439 words := string_to_array(s, ' ');
440 IF array_upper(words, 1) IS NOT NULL THEN
441 FOR j IN 1..array_upper(words, 1) LOOP
442 IF (words[j] != '') THEN
443 w := getorcreate_word_id(words[j]);
448 words := regexp_split_to_array(src, E'[,;()]');
449 IF array_upper(words, 1) != 1 THEN
450 FOR j IN 1..array_upper(words, 1) LOOP
451 s := make_standard_name(words[j]);
453 w := getorcreate_word_id(s);
458 s := regexp_replace(src, '市$', '');
460 s := make_standard_name(s);
462 w := getorcreate_name_id(s, src);