X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/91395fb9ab8e5ce969c9cd894454948423b9c0fe..a83424fee50c6837bcadad44ae374de98e4bb1f1:/utils/update.php diff --git a/utils/update.php b/utils/update.php index efdf6525..475e5836 100755 --- a/utils/update.php +++ b/utils/update.php @@ -156,22 +156,36 @@ if ($bHaveDiff) { } if ($aResult['deduplicate']) { - // - if (getPostgresVersion() < 9.3) { + $oDB =& getDB(); + + if (getPostgresVersion($oDB) < 9.3) { fail("ERROR: deduplicate is only currently supported in postgresql 9.3"); } - $oDB =& getDB(); $sSQL = 'select partition from country_name order by country_code'; $aPartitions = chksql($oDB->getCol($sSQL)); $aPartitions[] = 0; - $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' ' and class is null and type is null and country_code is null group by word_token having count(*) > 1 order by word_token"; + // we don't care about empty search_name_* artitions, they can't contain mentions of duplicates + foreach ($aPartitions as $i => $sPartition) { + $sSQL = "select count(*) from search_name_".$sPartition; + $nEntries = chksql($oDB->getOne($sSQL)); + if ($nEntries == 0) { + unset($aPartitions[$i]); + } + } + + $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '"; + $sSQL .= " and class is null and type is null and country_code is null"; + $sSQL .= " group by word_token having count(*) > 1 order by word_token"; $aDuplicateTokens = chksql($oDB->getAll($sSQL)); foreach ($aDuplicateTokens as $aToken) { if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue; echo "Deduping ".$aToken['word_token']."\n"; - $sSQL = "select word_id,(select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num from word where word_token = '".$aToken['word_token']."' and class is null and type is null and country_code is null order by num desc"; + $sSQL = "select word_id,"; + $sSQL .= " (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num"; + $sSQL .= " from word where word_token = '".$aToken['word_token']; + $sSQL .= "' and class is null and type is null and country_code is null order by num desc"; $aTokenSet = chksql($oDB->getAll($sSQL)); $aKeep = array_shift($aTokenSet);