From 3eb4d8805700ba12bd601e552c3bc48064423083 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 28 Apr 2021 10:59:07 +0200 Subject: [PATCH] boilerplate for PHP code of tokenizer This adds an installation step for PHP code for the tokenizer. The PHP code is split in two parts. The updateable code is found in lib-php. The tokenizer installs an additional script in the project directory which then includes the code from lib-php and defines all settings that are static to the database. The website code then always includes the PHP from the project directory. --- lib-php/Geocode.php | 1 + lib-php/admin/query.php | 5 ++- lib-php/admin/warm.php | 5 ++- lib-php/tokenizer/legacy_tokenizer.php | 1 + nominatim/tokenizer/factory.py | 3 +- nominatim/tokenizer/legacy_tokenizer.py | 22 +++++++++++-- nominatim/tools/refresh.py | 6 ++-- test/bdd/steps/nominatim_environment.py | 33 +++++++++---------- test/python/dummy_tokenizer.py | 2 +- test/python/test_tokenizer_legacy.py | 1 + .../test_tools_refresh_setup_website.py | 1 + 11 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 lib-php/tokenizer/legacy_tokenizer.php diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 6cec6a85..d9c1b3c0 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -8,6 +8,7 @@ require_once(CONST_LibDir.'/ReverseGeocode.php'); require_once(CONST_LibDir.'/SearchDescription.php'); require_once(CONST_LibDir.'/SearchContext.php'); require_once(CONST_LibDir.'/TokenList.php'); +require_once(CONST_TokenizerDir.'/tokenizer.php'); class Geocode { diff --git a/lib-php/admin/query.php b/lib-php/admin/query.php index 268b87cc..21121fbd 100644 --- a/lib-php/admin/query.php +++ b/lib-php/admin/query.php @@ -2,7 +2,6 @@ @define('CONST_LibDir', dirname(dirname(__FILE__))); require_once(CONST_LibDir.'/init-cmd.php'); -require_once(CONST_LibDir.'/Geocode.php'); require_once(CONST_LibDir.'/ParameterParser.php'); ini_set('memory_limit', '800M'); @@ -41,16 +40,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd()); @define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false)); @define('CONST_Log_DB', getSettingBool('LOG_DB')); @define('CONST_Log_File', getSetting('LOG_FILE', false)); -@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY')); @define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL')); @define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT')); @define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES')); @define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE')); @define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD')); -@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION')); @define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA')); @define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false)); +@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer'); +require_once(CONST_LibDir.'/Geocode.php'); $oDB = new Nominatim\DB; $oDB->connect(); diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php index d7950af9..d6aa3d9b 100644 --- a/lib-php/admin/warm.php +++ b/lib-php/admin/warm.php @@ -3,7 +3,6 @@ require_once(CONST_LibDir.'/init-cmd.php'); require_once(CONST_LibDir.'/log.php'); -require_once(CONST_LibDir.'/Geocode.php'); require_once(CONST_LibDir.'/PlaceLookup.php'); require_once(CONST_LibDir.'/ReverseGeocode.php'); @@ -26,16 +25,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd()); @define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false)); @define('CONST_Log_DB', getSettingBool('LOG_DB')); @define('CONST_Log_File', getSetting('LOG_FILE', false)); -@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY')); @define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL')); @define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT')); @define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES')); @define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE')); @define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD')); -@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION')); @define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA')); @define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false)); +@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer'); +require_once(CONST_LibDir.'/Geocode.php'); $oDB = new Nominatim\DB(); $oDB->connect(); diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php new file mode 100644 index 00000000..b3d9bbc7 --- /dev/null +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -0,0 +1 @@ +