1 -- Get tokens used for searching the given place.
3 -- These are the tokens that will be saved in the search_name table.
4 CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
7 SELECT (info->>'names')::INTEGER[]
8 $$ LANGUAGE SQL IMMUTABLE STRICT;
11 -- Get tokens for matching the place name against others.
13 -- This should usually be restricted to full name tokens.
14 CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
17 SELECT (info->>'names')::INTEGER[]
18 $$ LANGUAGE SQL IMMUTABLE STRICT;
21 -- Return the housenumber tokens applicable for the place.
22 CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
25 SELECT (info->>'hnr_tokens')::INTEGER[]
26 $$ LANGUAGE SQL IMMUTABLE STRICT;
29 -- Return the housenumber in the form that it can be matched during search.
30 CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
34 $$ LANGUAGE SQL IMMUTABLE STRICT;
37 CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
40 SELECT (info->>'street')::INTEGER[]
41 $$ LANGUAGE SQL IMMUTABLE STRICT;
44 CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
47 SELECT (info->>'place_match')::INTEGER[]
48 $$ LANGUAGE SQL IMMUTABLE STRICT;
51 CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
54 SELECT (info->>'place_search')::INTEGER[]
55 $$ LANGUAGE SQL IMMUTABLE STRICT;
58 DROP TYPE IF EXISTS token_addresstoken CASCADE;
59 CREATE TYPE token_addresstoken AS (
65 CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
66 RETURNS SETOF token_addresstoken
68 SELECT key, (value->>1)::int[] as match_tokens,
69 (value->>0)::int[] as search_tokens
70 FROM jsonb_each(info->'addr');
71 $$ LANGUAGE SQL IMMUTABLE STRICT;
74 CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
77 SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
78 $$ LANGUAGE SQL IMMUTABLE STRICT;
81 -- Return token info that should be saved permanently in the database.
82 CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
86 $$ LANGUAGE SQL IMMUTABLE STRICT;
88 --------------- private functions ----------------------------------------------
90 -- Functions for term normalisation and access to the 'word' table.
92 CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
93 AS '{{ modulepath }}/nominatim.so', 'transliteration'
94 LANGUAGE c IMMUTABLE STRICT;
97 CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
98 AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
99 LANGUAGE c IMMUTABLE STRICT;
102 CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
107 o := public.gettokenstring(public.transliteration(name));
108 RETURN trim(substr(o,1,length(o)));
111 LANGUAGE plpgsql IMMUTABLE;
113 -- returns NULL if the word is too common
114 CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
119 return_word_id INTEGER;
122 lookup_token := trim(lookup_word);
123 SELECT min(word_id), max(search_name_count) FROM word
124 WHERE word_token = lookup_token and class is null and type is null
125 INTO return_word_id, count;
126 IF return_word_id IS NULL THEN
127 return_word_id := nextval('seq_word');
128 INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
130 IF count > {{ max_word_freq }} THEN
131 return_word_id := NULL;
134 RETURN return_word_id;
140 -- Create housenumber tokens from an OSM addr:housenumber.
141 -- The housnumber is split at comma and semicolon as necessary.
142 -- The function returns the normalized form of the housenumber suitable
144 CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
149 SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
150 INTO normtext, tokens
151 FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
152 FROM (SELECT make_standard_name(h) as lookup_word
153 FROM unnest(housenumbers) h) x) y;
155 $$ LANGUAGE plpgsql STABLE STRICT;
158 CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
163 return_word_id INTEGER;
165 lookup_token := ' ' || trim(lookup_word);
166 SELECT min(word_id) FROM word
167 WHERE word_token = lookup_token and class='place' and type='house'
169 IF return_word_id IS NULL THEN
170 return_word_id := nextval('seq_word');
171 INSERT INTO word VALUES (return_word_id, lookup_token, null,
172 'place', 'house', null, 0);
174 RETURN return_word_id;
180 CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
186 return_word_id INTEGER;
188 lookup_token := ' ' || make_standard_name(postcode);
190 SELECT word_id FROM word
191 WHERE word_token = lookup_token and word = postcode
192 and class='place' and type='postcode'
197 INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
198 'place', 'postcode', null, 0);
205 CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
210 nospace_lookup_token TEXT;
211 return_word_id INTEGER;
213 lookup_token := ' '||trim(lookup_word);
214 SELECT min(word_id) FROM word
215 WHERE word_token = lookup_token and class is null and type is null
217 IF return_word_id IS NULL THEN
218 return_word_id := nextval('seq_word');
219 INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
220 null, null, null, 0);
222 RETURN return_word_id;
228 -- Normalize a string and lookup its word ids (partial words).
229 CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
235 return_word_id INTEGER[];
239 words := string_to_array(make_standard_name(lookup_word), ' ');
240 IF array_upper(words, 1) IS NOT NULL THEN
241 FOR j IN 1..array_upper(words, 1) LOOP
242 IF (words[j] != '') THEN
243 SELECT array_agg(word_id) INTO word_ids
245 WHERE word_token = words[j] and class is null and type is null;
247 IF word_ids IS NULL THEN
248 id := nextval('seq_word');
249 INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
250 return_word_id := return_word_id || id;
252 return_word_id := array_merge(return_word_id, word_ids);
258 RETURN return_word_id;
264 -- Normalize a string and look up its name ids (full words).
265 CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
270 return_word_ids INTEGER[];
272 lookup_token := ' '|| make_standard_name(lookup_word);
273 SELECT array_agg(word_id) FROM word
274 WHERE word_token = lookup_token and class is null and type is null
275 INTO return_word_ids;
276 RETURN return_word_ids;
279 LANGUAGE plpgsql STABLE STRICT;
282 CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
293 result := '{}'::INTEGER[];
295 FOR value IN SELECT unnest(regexp_split_to_array(svals(src), E'[,;]')) LOOP
297 s := make_standard_name(value);
298 w := getorcreate_name_id(s, value);
300 IF not(ARRAY[w] <@ result) THEN
301 result := result || w;
304 -- partial single-word terms
305 words := string_to_array(s, ' ');
306 IF array_upper(words, 1) IS NOT NULL THEN
307 FOR j IN 1..array_upper(words, 1) LOOP
308 IF (words[j] != '') THEN
309 w = getorcreate_word_id(words[j]);
310 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
311 result := result || w;
317 -- consider parts before an opening braket a full word as well
318 words := regexp_split_to_array(value, E'[(]');
319 IF array_upper(words, 1) > 1 THEN
320 s := make_standard_name(words[1]);
322 w := getorcreate_name_id(s, words[1]);
323 IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
324 result := result || w;
329 s := regexp_replace(value, '市$', '');
331 s := make_standard_name(s);
333 w := getorcreate_name_id(s, value);
334 IF NOT (ARRAY[w] <@ result) THEN
335 result := result || w;
348 CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
358 s := make_standard_name(src);
359 w := getorcreate_name_id(s, src);
361 w := getorcreate_word_id(s);
363 words := string_to_array(s, ' ');
364 IF array_upper(words, 1) IS NOT NULL THEN
365 FOR j IN 1..array_upper(words, 1) LOOP
366 IF (words[j] != '') THEN
367 w := getorcreate_word_id(words[j]);
372 words := regexp_split_to_array(src, E'[,;()]');
373 IF array_upper(words, 1) != 1 THEN
374 FOR j IN 1..array_upper(words, 1) LOOP
375 s := make_standard_name(words[j]);
377 w := getorcreate_word_id(s);
382 s := regexp_replace(src, '市$', '');
384 s := make_standard_name(s);
386 w := getorcreate_name_id(s, src);