From 0ef64258470a5665b20122a75cbac1bb8cc7121c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 5 Jan 2020 15:38:20 +0100 Subject: [PATCH 1/1] move SQL functions for normalisation in separate file --- lib/setup/SetupClass.php | 2 + sql/functions.sql | 374 ---------------------------- sql/functions/normalization.sql | 423 ++++++++++++++++++++++++++++++++ 3 files changed, 425 insertions(+), 374 deletions(-) create mode 100644 sql/functions/normalization.sql diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index 38f361e7..92d6347b 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -648,7 +648,9 @@ class SetupFunctions private function createSqlFunctions() { + $sBasePath = CONST_BasePath.'/sql/functions/'; $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql'); + $sTemplate .= file_get_contents($sBasePath.'normalization.sql'); $sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate); if ($this->bEnableDiffUpdates) { $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate); diff --git a/sql/functions.sql b/sql/functions.sql index ad2007cb..908236b1 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -22,215 +22,6 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text - AS '{modulepath}/nominatim.so', 'transliteration' -LANGUAGE c IMMUTABLE STRICT; - -CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text - AS '{modulepath}/nominatim.so', 'gettokenstring' -LANGUAGE c IMMUTABLE STRICT; - -CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT - AS $$ -DECLARE - o TEXT; -BEGIN - o := public.gettokenstring(public.transliteration(name)); - RETURN trim(substr(o,1,length(o))); -END; -$$ -LANGUAGE 'plpgsql' IMMUTABLE; - --- returns NULL if the word is too common -CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; - count INTEGER; -BEGIN - lookup_token := trim(lookup_word); - SELECT min(word_id), max(search_name_count) FROM word WHERE word_token = lookup_token and class is null and type is null into return_word_id, count; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0); - ELSE - IF count > get_maxwordfreq() THEN - return_word_id := NULL; - END IF; - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class='place' and type='house' into return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, 'place', 'house', null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - lookup_word TEXT; - return_word_id INTEGER; -BEGIN - lookup_word := upper(trim(postcode)); - lookup_token := ' ' || make_standard_name(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class='place' and type='postcode' into return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, 'place', 'postcode', null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, lookup_country_code varchar(2)) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and country_code=lookup_country_code into return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, lookup_country_code, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op); - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - nospace_lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class is null and type is null into return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, src_word, null, null, null, 0); --- nospace_lookup_token := replace(replace(lookup_token, '-',''), ' ',''); --- IF ' '||nospace_lookup_token != lookup_token THEN --- INSERT INTO word VALUES (return_word_id, '-'||nospace_lookup_token, null, src_word, null, null, null, 0, null); --- END IF; - END IF; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE -BEGIN - RETURN getorcreate_name_id(lookup_word, ''); -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION get_word_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class is null and type is null into return_word_id; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql IMMUTABLE; - -CREATE OR REPLACE FUNCTION get_name_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word WHERE word_token = lookup_token and class is null and type is null into return_word_id; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql IMMUTABLE; - -CREATE OR REPLACE FUNCTION get_name_ids(lookup_word TEXT) - RETURNS INTEGER[] - AS $$ -DECLARE - lookup_token TEXT; - return_word_ids INTEGER[]; -BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT array_agg(word_id) FROM word WHERE word_token = lookup_token and class is null and type is null into return_word_ids; - RETURN return_word_ids; -END; -$$ -LANGUAGE plpgsql IMMUTABLE; CREATE OR REPLACE FUNCTION array_merge(a INTEGER[], b INTEGER[]) RETURNS INTEGER[] @@ -371,171 +162,6 @@ $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID - AS $$ -DECLARE - s TEXT; - w INTEGER; - words TEXT[]; - item RECORD; - j INTEGER; -BEGIN - FOR item IN SELECT (each(src)).* LOOP - - s := make_standard_name(item.value); - w := getorcreate_country(s, lookup_country_code); - - words := regexp_split_to_array(item.value, E'[,;()]'); - IF array_upper(words, 1) != 1 THEN - FOR j IN 1..array_upper(words, 1) LOOP - s := make_standard_name(words[j]); - IF s != '' THEN - w := getorcreate_country(s, lookup_country_code); - END IF; - END LOOP; - END IF; - END LOOP; -END; -$$ -LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION make_keywords(src HSTORE) RETURNS INTEGER[] - AS $$ -DECLARE - result INTEGER[]; - s TEXT; - w INTEGER; - words TEXT[]; - item RECORD; - j INTEGER; -BEGIN - result := '{}'::INTEGER[]; - - FOR item IN SELECT (each(src)).* LOOP - - s := make_standard_name(item.value); - - w := getorcreate_name_id(s, item.value); - - IF not(ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - w := getorcreate_word_id(s); - - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - words := string_to_array(s, ' '); - IF array_upper(words, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(words, 1) LOOP - IF (words[j] != '') THEN - w = getorcreate_word_id(words[j]); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - words := regexp_split_to_array(item.value, E'[,;()]'); - IF array_upper(words, 1) != 1 THEN - FOR j IN 1..array_upper(words, 1) LOOP - s := make_standard_name(words[j]); - IF s != '' THEN - w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - s := regexp_replace(item.value, '市$', ''); - IF s != item.value THEN - s := make_standard_name(s); - IF s != '' THEN - w := getorcreate_name_id(s, item.value); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END IF; - - END LOOP; - - RETURN result; -END; -$$ -LANGUAGE plpgsql IMMUTABLE; - -CREATE OR REPLACE FUNCTION make_keywords(src TEXT) RETURNS INTEGER[] - AS $$ -DECLARE - result INTEGER[]; - s TEXT; - w INTEGER; - words TEXT[]; - i INTEGER; - j INTEGER; -BEGIN - result := '{}'::INTEGER[]; - - s := make_standard_name(src); - w := getorcreate_name_id(s, src); - - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - w := getorcreate_word_id(s); - - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - - words := string_to_array(s, ' '); - IF array_upper(words, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(words, 1) LOOP - IF (words[j] != '') THEN - w = getorcreate_word_id(words[j]); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - words := regexp_split_to_array(src, E'[,;()]'); - IF array_upper(words, 1) != 1 THEN - FOR j IN 1..array_upper(words, 1) LOOP - s := make_standard_name(words[j]); - IF s != '' THEN - w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END LOOP; - END IF; - - s := regexp_replace(src, '市$', ''); - IF s != src THEN - s := make_standard_name(s); - IF s != '' THEN - w := getorcreate_name_id(s, src); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - END IF; - END IF; - - RETURN result; -END; -$$ -LANGUAGE plpgsql IMMUTABLE; - CREATE OR REPLACE FUNCTION get_country_code(place geometry) RETURNS TEXT AS $$ DECLARE diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql new file mode 100644 index 00000000..53a81588 --- /dev/null +++ b/sql/functions/normalization.sql @@ -0,0 +1,423 @@ +-- Functions for term normalisation and access to the 'word' table. + +CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text + AS '{modulepath}/nominatim.so', 'transliteration' +LANGUAGE c IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text + AS '{modulepath}/nominatim.so', 'gettokenstring' +LANGUAGE c IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT + AS $$ +DECLARE + o TEXT; +BEGIN + o := public.gettokenstring(public.transliteration(name)); + RETURN trim(substr(o,1,length(o))); +END; +$$ +LANGUAGE plpgsql IMMUTABLE; + +-- returns NULL if the word is too common +CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; + count INTEGER; +BEGIN + lookup_token := trim(lookup_word); + SELECT min(word_id), max(search_name_count) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_id, count; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0); + ELSE + IF count > get_maxwordfreq() THEN + return_word_id := NULL; + END IF; + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' ' || trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class='place' and type='house' + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, null, + 'place', 'house', null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + lookup_word TEXT; + return_word_id INTEGER; +BEGIN + lookup_word := upper(trim(postcode)); + lookup_token := ' ' || make_standard_name(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class='place' and type='postcode' + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, + 'place', 'postcode', null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, + lookup_country_code varchar(2)) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and country_code=lookup_country_code + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, null, + null, null, lookup_country_code, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, + lookup_class text, lookup_type text) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and word = normalized_word + and class = lookup_class and type = lookup_type + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, + lookup_class, lookup_type, null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, + normalized_word TEXT, + lookup_class text, + lookup_type text, + op text) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and word = normalized_word + and class = lookup_class and type = lookup_type and operator = op + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, + lookup_class, lookup_type, null, 0, op); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + nospace_lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, src_word, + null, null, null, 0); + END IF; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE +BEGIN + RETURN getorcreate_name_id(lookup_word, ''); +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION get_word_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_id; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql STABLE; + + +CREATE OR REPLACE FUNCTION get_name_id(lookup_word TEXT) + RETURNS INTEGER + AS $$ +DECLARE + lookup_token TEXT; + return_word_id INTEGER; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT min(word_id) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_id; + RETURN return_word_id; +END; +$$ +LANGUAGE plpgsql STABLE; + + +CREATE OR REPLACE FUNCTION get_name_ids(lookup_word TEXT) + RETURNS INTEGER[] + AS $$ +DECLARE + lookup_token TEXT; + return_word_ids INTEGER[]; +BEGIN + lookup_token := ' '||trim(lookup_word); + SELECT array_agg(word_id) FROM word + WHERE word_token = lookup_token and class is null and type is null + INTO return_word_ids; + RETURN return_word_ids; +END; +$$ +LANGUAGE plpgsql STABLE; + + +CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2)) + RETURNS VOID + AS $$ +DECLARE + s TEXT; + w INTEGER; + words TEXT[]; + item RECORD; + j INTEGER; +BEGIN + FOR item IN SELECT (each(src)).* LOOP + + s := make_standard_name(item.value); + w := getorcreate_country(s, country_code); + + words := regexp_split_to_array(item.value, E'[,;()]'); + IF array_upper(words, 1) != 1 THEN + FOR j IN 1..array_upper(words, 1) LOOP + s := make_standard_name(words[j]); + IF s != '' THEN + w := getorcreate_country(s, country_code); + END IF; + END LOOP; + END IF; + END LOOP; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION make_keywords(src HSTORE) + RETURNS INTEGER[] + AS $$ +DECLARE + result INTEGER[]; + s TEXT; + w INTEGER; + words TEXT[]; + item RECORD; + j INTEGER; +BEGIN + result := '{}'::INTEGER[]; + + FOR item IN SELECT (each(src)).* LOOP + + s := make_standard_name(item.value); + w := getorcreate_name_id(s, item.value); + + IF not(ARRAY[w] <@ result) THEN + result := result || w; + END IF; + + w := getorcreate_word_id(s); + + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + + words := string_to_array(s, ' '); + IF array_upper(words, 1) IS NOT NULL THEN + FOR j IN 1..array_upper(words, 1) LOOP + IF (words[j] != '') THEN + w = getorcreate_word_id(words[j]); + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + words := regexp_split_to_array(item.value, E'[,;()]'); + IF array_upper(words, 1) != 1 THEN + FOR j IN 1..array_upper(words, 1) LOOP + s := make_standard_name(words[j]); + IF s != '' THEN + w := getorcreate_word_id(s); + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + s := regexp_replace(item.value, '市$', ''); + IF s != item.value THEN + s := make_standard_name(s); + IF s != '' THEN + w := getorcreate_name_id(s, item.value); + IF NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END IF; + + END LOOP; + + RETURN result; +END; +$$ +LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION make_keywords(src TEXT) + RETURNS INTEGER[] + AS $$ +DECLARE + result INTEGER[]; + s TEXT; + w INTEGER; + words TEXT[]; + i INTEGER; + j INTEGER; +BEGIN + result := '{}'::INTEGER[]; + + s := make_standard_name(src); + w := getorcreate_name_id(s, src); + + IF NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + + w := getorcreate_word_id(s); + + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + + words := string_to_array(s, ' '); + IF array_upper(words, 1) IS NOT NULL THEN + FOR j IN 1..array_upper(words, 1) LOOP + IF (words[j] != '') THEN + w = getorcreate_word_id(words[j]); + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + words := regexp_split_to_array(src, E'[,;()]'); + IF array_upper(words, 1) != 1 THEN + FOR j IN 1..array_upper(words, 1) LOOP + s := make_standard_name(words[j]); + IF s != '' THEN + w := getorcreate_word_id(s); + IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END LOOP; + END IF; + + s := regexp_replace(src, '市$', ''); + IF s != src THEN + s := make_standard_name(s); + IF s != '' THEN + w := getorcreate_name_id(s, src); + IF NOT (ARRAY[w] <@ result) THEN + result := result || w; + END IF; + END IF; + END IF; + + RETURN result; +END; +$$ +LANGUAGE plpgsql; -- 2.39.5