}
if ($aResult['deduplicate']) {
- //
- if (getPostgresVersion() < 9.3) {
+ $oDB =& getDB();
+
+ if (getPostgresVersion($oDB) < 9.3) {
fail("ERROR: deduplicate is only currently supported in postgresql 9.3");
}
- $oDB =& getDB();
$sSQL = 'select partition from country_name order by country_code';
$aPartitions = chksql($oDB->getCol($sSQL));
$aPartitions[] = 0;
- $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' ' and class is null and type is null and country_code is null group by word_token having count(*) > 1 order by word_token";
+ // we don't care about empty search_name_* artitions, they can't contain mentions of duplicates
+ foreach ($aPartitions as $i => $sPartition) {
+ $sSQL = "select count(*) from search_name_".$sPartition;
+ $nEntries = chksql($oDB->getOne($sSQL));
+ if ($nEntries == 0) {
+ unset($aPartitions[$i]);
+ }
+ }
+
+ $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
+ $sSQL .= " and class is null and type is null and country_code is null";
+ $sSQL .= " group by word_token having count(*) > 1 order by word_token";
$aDuplicateTokens = chksql($oDB->getAll($sSQL));
foreach ($aDuplicateTokens as $aToken) {
if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
echo "Deduping ".$aToken['word_token']."\n";
- $sSQL = "select word_id,(select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num from word where word_token = '".$aToken['word_token']."' and class is null and type is null and country_code is null order by num desc";
+ $sSQL = "select word_id,";
+ $sSQL .= " (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num";
+ $sSQL .= " from word where word_token = '".$aToken['word_token'];
+ $sSQL .= "' and class is null and type is null and country_code is null order by num desc";
$aTokenSet = chksql($oDB->getAll($sSQL));
$aKeep = array_shift($aTokenSet);
// First check if there are new updates published (except for minutelies - there's always new diffs to process)
if (CONST_Replication_Update_Interval > 60) {
unset($aReplicationLag);
- exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
+ exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
while ($iErrorLevel > 0 || $aReplicationLag[0] < 1) {
if ($iErrorLevel) {
echo "Error: $iErrorLevel. ";
}
sleep(CONST_Replication_Recheck_Interval);
unset($aReplicationLag);
- exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
+ exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
}
// There are new replication files - use osmosis to download the file
echo "\n".date('Y-m-d H:i:s')." Replication Delay is ".$aReplicationLag[0]."\n";
}
}
+
function getosmosistimestamp($sOsmosisConfigDirectory)
{
$sStateFile = file_get_contents($sOsmosisConfigDirectory.'/state.txt');