From cbaabe7c24f5d8b593148d90573909705ac15a13 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 28 Oct 2017 18:34:22 +0200 Subject: [PATCH] add function to recalculate counts for full-word search term --- sql/words_from_search_name.sql | 11 +++++++++++ utils/update.php | 7 +++++++ 2 files changed, 18 insertions(+) create mode 100644 sql/words_from_search_name.sql diff --git a/sql/words_from_search_name.sql b/sql/words_from_search_name.sql new file mode 100644 index 00000000..b7727dc6 --- /dev/null +++ b/sql/words_from_search_name.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS word_frequencies; +CREATE TABLE word_frequencies AS + SELECT unnest(name_vector) as id, count(*) FROM search_name GROUP BY id; + +CREATE INDEX idx_word_frequencies ON word_frequencies(id); + +UPDATE word SET search_name_count = count + FROM word_frequencies + WHERE word_token like ' %' and word_id = id; + +DROP TABLE word_frequencies; diff --git a/utils/update.php b/utils/update.php index 5a5d41ff..c5b5e4de 100755 --- a/utils/update.php +++ b/utils/update.php @@ -33,6 +33,7 @@ $aCMDOptions array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'), array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'), + array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'), array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); @@ -237,6 +238,12 @@ if ($aResult['deduplicate']) { } } +if ($aResult['recompute-word-counts']) { + info('Recompute frequency of full-word search terms'); + $sTemplate = file_get_contents(CONST_BasePath.'/sql/words_from_search_name.sql'); + runSQLScript($sTemplate, true, true); +} + if ($aResult['index']) { passthru(CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']); } -- 2.39.5