summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
e3fb706)
Compare the normalized terms imported with the special
terms script with the normalized version of the query string.
Disregard them if they cannot be found. This avoids a significant
number of mismatches due to transliteration issues.
The match will only be done when a normalized word has been set
making this change backwards compatible with older databases.
return $aSearchResults;
}
return $aSearchResults;
}
- public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+ public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
{
/*
Calculate all searches using aValidTokens i.e.
{
/*
Calculate all searches using aValidTokens i.e.
*/
}
} elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
*/
}
} elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
- if ($aSearch['sClass'] === '') {
- $aSearch['sOperator'] = $aSearchTerm['operator'];
+ // require a normalized exact match of the term
+ // if we have the normalizer version of the query
+ // available
+ if ($aSearch['sClass'] === ''
+ && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
$aSearch['sClass'] = $aSearchTerm['class'];
$aSearch['sType'] = $aSearchTerm['type'];
$aSearch['sClass'] = $aSearchTerm['class'];
$aSearch['sType'] = $aSearchTerm['type'];
- if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
- else $aSearch['sOperator'] = 'near'; // near = in for the moment
- if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+ if ($aSearchTerm['operator'] == '') {
+ $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near';
+ $aSearch['iSearchRank'] += 2;
+ } else {
+ $aSearch['sOperator'] = 'near'; // near = in for the moment
+ }
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
{
if (!$this->sQuery && !$this->aStructuredQuery) return array();
{
if (!$this->sQuery && !$this->aStructuredQuery) return array();
+ $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+ if ($oNormalizer !== null) {
+ $sNormQuery = $oNormalizer->transliterate($this->sQuery);
+ } else {
+ $sNormQuery = null;
+ }
+
$sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
$sCountryCodesSQL = false;
if ($this->aCountryCodes) {
$sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
$sCountryCodesSQL = false;
if ($this->aCountryCodes) {
// array with: placeid => -1 | tiger-housenumber
$aResultPlaceIDs = array();
// array with: placeid => -1 | tiger-housenumber
$aResultPlaceIDs = array();
- $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
+ $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
if ($this->bReverseInPlan) {
// Reverse phrase array and also reverse the order of the wordsets in
if ($this->bReverseInPlan) {
// Reverse phrase array and also reverse the order of the wordsets in
$aFinalPhrase = end($aPhrases);
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
}
$aFinalPhrase = end($aPhrases);
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
}
- $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
+ $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
// codes, to restrict import to a subset of languages.
// Currently only affects the import of country names and special phrases.
@define('CONST_Languages', false);
// codes, to restrict import to a subset of languages.
// Currently only affects the import of country names and special phrases.
@define('CONST_Languages', false);
+// Rules for normalizing terms for comparison before doing comparisons.
+// The default is to remove accents and punctuation and to lower-case the
+// term. Spaces are kept but collapsed to one standard space.
+@define('CONST_Term_Normalization_Rules', ":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);
// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);
include(CONST_InstallPath.'/settings/phrase_settings.php');
if ($aCMDResult['wiki-import']) {
include(CONST_InstallPath.'/settings/phrase_settings.php');
if ($aCMDResult['wiki-import']) {
- $oNormalizer = Transliterator::createFromRules(":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
+ $oNormalizer = Transliterator::createFromRules(CONST_Term_Normalization_Rules);
$aPairs = array();
$sLanguageIn = CONST_Languages ? CONST_Languages :
$aPairs = array();
$sLanguageIn = CONST_Languages ? CONST_Languages :
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
- $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
+ if ($oNormalizer !== null) {
+ $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
+ } else {
+ $sTrans = null;
+ }
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with