]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 3 Apr 2013 20:40:33 +0000 (22:40 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 3 Apr 2013 20:40:33 +0000 (22:40 +0200)
1  2 
settings/settings.php
utils/setup.php
utils/update.php
website/search.php

diff --combined settings/settings.php
index 915952727ccc098fa3ba83295def733efbc39ac4,771cf80fa6e5f723f340a29f0c3ce2b1df335845..159c24ac27544f2c86e289f3b07801c53384b8e3
@@@ -4,7 -4,7 +4,7 @@@
  
        // General settings
        @define('CONST_Debug', false);
-       @define('CONST_Database_DSN', 'pgsql://@/nominatim');
+       @define('CONST_Database_DSN', 'pgsql://@/nominatim'); // <driver>://<username>:<password>@<host>:<port>/<database>
        @define('CONST_Max_Word_Frequency', '50000');
  
        // Software versions
        @define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-'.CONST_Postgis_Version);
        @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
        @define('CONST_Osmosis_Binary', '/usr/bin/osmosis');
+       // Replication settings
+       @define('CONST_Replication_Url', 'http://planet.openstreetmap.org/replication/minute');
+       @define('CONST_Replication_MaxInterval', '3600');
+       @define('CONST_Replication_Update_Interval', '60');  // How often upstream publishes diffs
+       @define('CONST_Replication_Recheck_Interval', '60'); // How long to sleep if no update found yet
  
        // Connection buckets to rate limit people being nasty
        @define('CONST_ConnectionBucket_MemcacheServerAddress', false);
  
        // Website settings
        @define('CONST_NoAccessControl', true);
 -      @define('CONST_ClosedForIndexing', false);
 -      @define('CONST_ClosedForIndexingExceptionIPs', '');
        @define('CONST_BlockedIPs', '');
 +      @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
 +      @define('CONST_WhitelistedIPs', '');
 +      @define('CONST_BlockedUserAgents', '');
 +      @define('CONST_BlockReverseMaxLoad', 15);
        @define('CONST_BulkUserIPs', '');
  
 -      @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
 +      @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
        @define('CONST_Tile_Default', 'Mapnik');
  
        @define('CONST_Default_Language', 'xx');
diff --combined utils/setup.php
index 68e7a8b37d8c3f726418fa3cb6da90f651206fcb,3f2e6d05d1a083089c7f9b1e70ce1ef52b85e933..f8da1970e73eb2448b2f8969290609a8eaf2ea25
                        echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
                        fail("osm2pgsql not found in '$osm2pgsql'");
                }
 +              $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
                $osm2pgsql .= ' -lsc -O gazetteer --hstore';
 -              $osm2pgsql .= ' -C '.$iCacheMemory;
 +              $osm2pgsql .= ' -C 16000';
                $osm2pgsql .= ' -P '.$aDSNInfo['port'];
                $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
                passthruCheckReturn($osm2pgsql);
                $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
                $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
                $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
 -              $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
 +              $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
                if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
  
                $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
                $bDidSomething = true;
                $oDB =& getDB();
  
-               if (!file_exists(CONST_Osmosis_Binary)) fail("please download osmosis");
+               if (!file_exists(CONST_Osmosis_Binary))
+               {
+                       echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
+                       fail("osmosis not found in '".CONST_Osmosis_Binary."'");
+               }
                if (file_exists(CONST_BasePath.'/settings/configuration.txt'))
                {
                        echo "settings/configuration.txt already exists\n";
                else
                {
                        passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
-                       // server layout changed afer license change, fix path to minutely diffs
-                       passthru("sed -i 's:minute-replicate:replication/minute:' ".CONST_BasePath.'/settings/configuration.txt');
+                       // update osmosis configuration.txt with our settings
+                       passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_BasePath.'/settings/configuration.txt');
+                       passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_BasePath.'/settings/configuration.txt');
                }
  
                // Find the last node in the DB
                $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60);
  
  
-               // Search for the correct state file - uses file timestamps
-               $sRepURL = 'http://planet.openstreetmap.org/replication/minute/';
-               $sRep = file_get_contents($sRepURL);
-               preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a> *(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
+               // Search for the correct state file - uses file timestamps so need to sort by date descending
+               $sRepURL = CONST_Replication_Url."/";
+               $sRep = file_get_contents($sRepURL."?C=M;O=D");
+               // download.geofabrik.de:    <a href="000/">000/</a></td><td align="right">26-Feb-2013 11:53  </td>
+               // planet.openstreetmap.org: <a href="273/">273/</a>                    22-Mar-2013 07:41    -
+               preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>.*(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
                $aPrevRepMatch = false;
                foreach($aRepMatches as $aRepMatch)
                {
                if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
  
                $sRepURL .= $aRepMatch[1];
-               $sRep = file_get_contents($sRepURL);
-               preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a> *(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
+               $sRep = file_get_contents($sRepURL."?C=M;O=D");
+               preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>.*(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
                $aPrevRepMatch = false;
                foreach($aRepMatches as $aRepMatch)
                {
                if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
  
                $sRepURL .= $aRepMatch[1];
-               $sRep = file_get_contents($sRepURL);
-               preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a> *(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
+               $sRep = file_get_contents($sRepURL."?C=M;O=D");
+               preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a>.*(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
                $aPrevRepMatch = false;
                foreach($aRepMatches as $aRepMatch)
                {
diff --combined utils/update.php
index 0918730d01fb9e2783cab9baf90174d0e94e79e7,76d6f8582c2e1e9646845890b1d451b5ff8c6527..e8972f1d26f4d3cefa962667459966d38f548050
@@@ -46,6 -46,7 +46,6 @@@
                showUsage($aCMDOptions, true, 'Select either import of hourly or daily');
        }
  
 -      if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
        if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
  /*
        // Lock to prevent multiple copies running
  
        if ($aResult['index'])
        {
 +              if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
                passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
        }
  
                $sOsmosisCMD = CONST_Osmosis_Binary;
                $sOsmosisConfigDirectory = CONST_BasePath.'/settings';
                $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile;
+               $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory;
                $sCMDImport = CONST_Osm2pgsql_Binary.' -klas -C 2000 -O gazetteer -d '.$aDSNInfo['database'].' '.$sImportFile;
 -              $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'];
 +              $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'];
                if (!$aResult['no-npi']) {
                        $sCMDIndex .= '-F ';
                }
  //                    {
                                if (!file_exists($sImportFile))
                                {
-                                       // Use osmosis to download the file
+                                       // First check if there are new updates published (except for minutelies - there's always new diffs to process)
+                                       if ( CONST_Replication_Update_Interval > 60 )
+                                       {
+                                               unset($aReplicationLag);
+                                               exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel); 
+                                               while ($iErrorLevel == 1 || $aReplicationLag[0] < 1)
+                                               {
+                                                       if ($iErrorLevel)
+                                                       {
+                                                               echo "Error: $iErrorLevel. ";
+                                                               echo "Re-trying: ".$sCMDCheckReplicationLag." in ".CONST_Replication_Recheck_Interval." secs\n";
+                                                       }
+                                                       else
+                                                       {
+                                                               echo ".";
+                                                       }
+                                                       sleep(CONST_Replication_Recheck_Interval);
+                                                       unset($aReplicationLag);
+                                                       exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel); 
+                                               }
+                                               // There are new replication files - use osmosis to download the file
+                                               echo "\nReplication Delay is ".$aReplicationLag[0]."\n";
+                                       }
                                        $fCMDStartTime = time();
                                        echo $sCMDDownload."\n";
                                        exec($sCMDDownload, $sJunk, $iErrorLevel);
                        $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
  
                        // Index file
 -                      $sThisIndexCmd = $sCMDIndex;
 +                      if (!isset($aResult['index-instances']))
 +                      {
 +                              if (getLoadAverage() < 15)
 +                                      $iIndexInstances = 2;
 +                              else
 +                                      $iIndexInstances = 1;
 +                      } else
 +                              $iIndexInstances = $aResult['index-instances'];
 +
 +                      $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances;
  
                        if (!$aResult['no-npi'])
                        {
                        echo "Completed for $sBatchEnd in ".round($fDuration/60,2)."\n";
                        if (!$aResult['import-osmosis-all']) exit;
  
-                       echo "Sleeping ".max(0,60-$fDuration)." seconds\n";
-                       sleep(max(0,60-$fDuration));
+                       if ( CONST_Replication_Update_Interval > 60 )
+                       {
+                               $iSleep = round(CONST_Replication_Update_Interval*0.8);
+                       }
+                       else
+                       {
+                               $iSleep = max(0,CONST_Replication_Update_Interval-$fDuration);
+                       }
+                       echo "Sleeping $iSleep seconds\n";
+                       sleep($iSleep);
                }
  
        }
diff --combined website/search.php
index ed67b4196401f5fb03e831aa6c4cbe035a8c5e17,5667181bdfe5b7b111643925f53f69d350cf9e28..d0c45c70636b7822598600577bd73cea1a4314f4
@@@ -71,7 -71,6 +71,7 @@@
        if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true;
 +      if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true;
  
        $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
  
                                array('county', 9, 13),
                                array('state', 8, 8),
                                array('country', 4, 4),
-                               array('postalcode', 16, 25),
+                               array('postalcode', 5, 11),
                                );
        $aStructuredQuery = array();
        $sAllowedTypesSQLList = '';
                                                                else
                                                                        $sSQL .= " limit ".$iLimit;
  
 -                                                              if (CONST_Debug) { var_dump($sSQL); }
 +                                                              if (CONST_Debug) var_dump($sSQL);
 +                                                              $iStartTime = time();
                                                                $aViewBoxPlaceIDs = $oDB->getAll($sSQL);
                                                                if (PEAR::IsError($aViewBoxPlaceIDs))
                                                                {
                                                                        failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs);
                                                                }
 +                                                              if (time() - $iStartTime > 60) {
 +                                                                      file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND);
 +                                                              }
 +
  //var_dump($aViewBoxPlaceIDs);
                                                                // Did we have an viewbox matches?
                                                                $aPlaceIDs = array();