]> git.openstreetmap.org Git - nominatim.git/blobdiff - utils/importWikipedia.php
small fixes on setup.php and a bring update.php to work
[nominatim.git] / utils / importWikipedia.php
index a4b8cb77933d8cab80451928d5648fbcbf3e164e..6e42975481cbbd35b8fafb92ddbe7b4f9652a045 100755 (executable)
@@ -7,7 +7,7 @@ ini_set('memory_limit', '800M');
 
 $aCMDOptions
  = array(
 
 $aCMDOptions
  = array(
-    "Create and setup nominatim search system",
+    'Create and setup nominatim search system',
     array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
     array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
     array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
     array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
     array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
     array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
@@ -37,7 +37,7 @@ $sTestPageText = <<<EOD
 | coasters = 12
 | water_rides = 2
 | owner = [[Six Flags]]
 | coasters = 12
 | water_rides = 2
 | owner = [[Six Flags]]
-| general_manager = 
+| general_manager =
 | homepage = [http://www.sixflags.com/parks/greatadventure/ Six Flags Great Adventure]
 }}
 EOD;
 | homepage = [http://www.sixflags.com/parks/greatadventure/ Six Flags Great Adventure]
 }}
 EOD;
@@ -90,12 +90,14 @@ EOD;
     $oDB->query($sSQL);
 }
 
     $oDB->query($sSQL);
 }
 
+
 function degreesAndMinutesToDecimal($iDegrees, $iMinutes = 0, $fSeconds = 0, $sNSEW = 'N')
 {
     $sNSEW = strtoupper($sNSEW);
     return ($sNSEW == 'S' || $sNSEW == 'W'?-1:1) * ((float)$iDegrees + (float)$iMinutes/60 + (float)$fSeconds/3600);
 }
 
 function degreesAndMinutesToDecimal($iDegrees, $iMinutes = 0, $fSeconds = 0, $sNSEW = 'N')
 {
     $sNSEW = strtoupper($sNSEW);
     return ($sNSEW == 'S' || $sNSEW == 'W'?-1:1) * ((float)$iDegrees + (float)$iMinutes/60 + (float)$fSeconds/3600);
 }
 
+
 function _parseWikipediaContent($sPageText)
 {
     $sPageText = str_replace("\n", ' ', $sPageText);
 function _parseWikipediaContent($sPageText)
 {
     $sPageText = str_replace("\n", ' ', $sPageText);
@@ -113,83 +115,83 @@ function _parseWikipediaContent($sPageText)
     $aState = array('body');
     foreach ($aPageText as $i => $sPart) {
         switch ($sPart) {
     $aState = array('body');
     foreach ($aPageText as $i => $sPart) {
         switch ($sPart) {
-        case '{{':
-            array_unshift($aTemplateStack, array('', array()));
-            array_unshift($aState, 'template');
-            break;
-        case '}}':
-            if ($aState[0] == 'template' || $aState[0] == 'templateparam') {
-                $aTemplate = array_shift($aTemplateStack);
-                array_shift($aState);
-
-                $aTemplates[] = $aTemplate;
-            }
-            break;
-        case '[[':
-            $sLinkPage = '';
-            $sLinkSyn = '';
-            array_unshift($aState, 'link');
-            break;
-        case ']]':
-            if ($aState[0] == 'link' || $aState[0] == 'linksynonim') {
-                if (!$sLinkSyn) $sLinkSyn = $sLinkPage;
-                if (substr($sLinkPage, 0, 6) == 'Image:') $sLinkSyn = substr($sLinkPage, 6);
-
-                $aLinks[] = array($sLinkPage, $sLinkSyn);
+            case '{{':
+                array_unshift($aTemplateStack, array('', array()));
+                array_unshift($aState, 'template');
+                break;
+            case '}}':
+                if ($aState[0] == 'template' || $aState[0] == 'templateparam') {
+                    $aTemplate = array_shift($aTemplateStack);
+                    array_shift($aState);
 
 
-                array_shift($aState);
-                switch ($aState[0]) {
-                case 'template':
-                    $aTemplateStack[0][0] .= trim($sPart);
-                    break;
-                case 'templateparam':
-                    $aTemplateStack[0][1][0] .= $sLinkSyn;
-                    break;
-                case 'link':
-                    $sLinkPage .= trim($sPart);
-                    break;
-                case 'linksynonim':
-                    $sLinkSyn .= $sPart;
-                    break;
-                case 'body':
-                    $sPageBody .= $sLinkSyn;
-                    break;
-                default:
-                    var_dump($aState, $sPageName, $aTemplateStack, $sPart, $aPageText);
-                    fail('unknown state');
+                    $aTemplates[] = $aTemplate;
                 }
                 }
-            }
-            break;
-        case '|':
-            if ($aState[0] == 'template' || $aState[0] == 'templateparam') {
-                // Create a new template paramater
-                $aState[0] = 'templateparam';
-                array_unshift($aTemplateStack[0][1], '');
-            }
-            if ($aState[0] == 'link') $aState[0] = 'linksynonim';
-            break;
-        default:
-            switch ($aState[0]) {
-            case 'template':
-                $aTemplateStack[0][0] .= trim($sPart);
-                break;
-            case 'templateparam':
-                $aTemplateStack[0][1][0] .= $sPart;
                 break;
                 break;
-            case 'link':
-                $sLinkPage .= trim($sPart);
+            case '[[':
+                $sLinkPage = '';
+                $sLinkSyn = '';
+                array_unshift($aState, 'link');
                 break;
                 break;
-            case 'linksynonim':
-                $sLinkSyn .= $sPart;
+            case ']]':
+                if ($aState[0] == 'link' || $aState[0] == 'linksynonim') {
+                    if (!$sLinkSyn) $sLinkSyn = $sLinkPage;
+                    if (substr($sLinkPage, 0, 6) == 'Image:') $sLinkSyn = substr($sLinkPage, 6);
+
+                    $aLinks[] = array($sLinkPage, $sLinkSyn);
+
+                    array_shift($aState);
+                    switch ($aState[0]) {
+                        case 'template':
+                            $aTemplateStack[0][0] .= trim($sPart);
+                            break;
+                        case 'templateparam':
+                            $aTemplateStack[0][1][0] .= $sLinkSyn;
+                            break;
+                        case 'link':
+                            $sLinkPage .= trim($sPart);
+                            break;
+                        case 'linksynonim':
+                            $sLinkSyn .= $sPart;
+                            break;
+                        case 'body':
+                            $sPageBody .= $sLinkSyn;
+                            break;
+                        default:
+                            var_dump($aState, $sPageName, $aTemplateStack, $sPart, $aPageText);
+                            fail('unknown state');
+                    }
+                }
                 break;
                 break;
-            case 'body':
-                $sPageBody .= $sPart;
+            case '|':
+                if ($aState[0] == 'template' || $aState[0] == 'templateparam') {
+                    // Create a new template paramater
+                    $aState[0] = 'templateparam';
+                    array_unshift($aTemplateStack[0][1], '');
+                }
+                if ($aState[0] == 'link') $aState[0] = 'linksynonim';
                 break;
             default:
                 break;
             default:
-                var_dump($aState, $aPageText);
-                fail('unknown state');
-            }
-            break;
+                switch ($aState[0]) {
+                    case 'template':
+                        $aTemplateStack[0][0] .= trim($sPart);
+                        break;
+                    case 'templateparam':
+                        $aTemplateStack[0][1][0] .= $sPart;
+                        break;
+                    case 'link':
+                        $sLinkPage .= trim($sPart);
+                        break;
+                    case 'linksynonim':
+                        $sLinkSyn .= $sPart;
+                        break;
+                    case 'body':
+                        $sPageBody .= $sPart;
+                        break;
+                    default:
+                        var_dump($aState, $aPageText);
+                        fail('unknown state');
+                }
+                break;
         }
     }
     return $aTemplates;
         }
     }
     return $aTemplates;
@@ -201,7 +203,7 @@ function _templatesToProperties($aTemplates)
     foreach ($aTemplates as $iTemplate => $aTemplate) {
         $aParams = array();
         foreach (array_reverse($aTemplate[1]) as $iParam => $sParam) {
     foreach ($aTemplates as $iTemplate => $aTemplate) {
         $aParams = array();
         foreach (array_reverse($aTemplate[1]) as $iParam => $sParam) {
-            if (($iPos = strpos($sParam, '=')) === FALSE) {
+            if (($iPos = strpos($sParam, '=')) === false) {
                 $aParams[] = trim($sParam);
             } else {
                 $aParams[trim(substr($sParam, 0, $iPos))] = trim(substr($sParam, $iPos+1));
                 $aParams[] = trim($sParam);
             } else {
                 $aParams[trim(substr($sParam, 0, $iPos))] = trim(substr($sParam, $iPos+1));
@@ -224,7 +226,7 @@ function _templatesToProperties($aTemplates)
         if (!isset($aPageProperties['sWebsite']) && isset($aParams['website']) && $aParams['website']) {
             if (preg_match('#^\\[?([^ \\]]+)[^\\]]*\\]?$#', $aParams['website'], $aMatch)) {
                 $aPageProperties['sWebsite'] = $aMatch[1];
         if (!isset($aPageProperties['sWebsite']) && isset($aParams['website']) && $aParams['website']) {
             if (preg_match('#^\\[?([^ \\]]+)[^\\]]*\\]?$#', $aParams['website'], $aMatch)) {
                 $aPageProperties['sWebsite'] = $aMatch[1];
-                if (strpos($aPageProperties['sWebsite'], ':/'.'/') === FALSE) {
+                if (strpos($aPageProperties['sWebsite'], ':/'.'/') === false) {
                     $aPageProperties['sWebsite'] = 'http:/'.'/'.$aPageProperties['sWebsite'];
                 }
             }
                     $aPageProperties['sWebsite'] = 'http:/'.'/'.$aPageProperties['sWebsite'];
                 }
             }
@@ -239,7 +241,7 @@ function _templatesToProperties($aTemplates)
         }
 
         // Assume the first template with lots of params is the type (fallback for infobox)
         }
 
         // Assume the first template with lots of params is the type (fallback for infobox)
-        if (!isset($aPageProperties['sPossibleInfoboxType']) && sizeof($aParams) > 10) {
+        if (!isset($aPageProperties['sPossibleInfoboxType']) && count($aParams) > 10) {
             $aPageProperties['sPossibleInfoboxType'] = trim($aTemplate[0]);
             // $aPageProperties['aInfoboxParams'] = $aParams;
         }
             $aPageProperties['sPossibleInfoboxType'] = trim($aTemplate[0]);
             // $aPageProperties['aInfoboxParams'] = $aParams;
         }
@@ -305,8 +307,14 @@ function _templatesToProperties($aTemplates)
 
 if (isset($aCMDResult['parse-wikipedia'])) {
     $oDB =& getDB();
 
 if (isset($aCMDResult['parse-wikipedia'])) {
     $oDB =& getDB();
-    $aArticleNames = $oDB->getCol('select page_title from content where page_namespace = 0 and page_id %10 = '.$aCMDResult['parse-wikipedia'].' and (page_content ilike \'%{{Coord%\' or (page_content ilike \'%lat%\' and page_content ilike \'%lon%\'))');
-//      $aArticleNames = $oDB->getCol($sSQL = 'select page_title from content where page_namespace = 0 and (page_content ilike \'%{{Coord%\' or (page_content ilike \'%lat%\' and page_content ilike \'%lon%\')) and page_title in (\'Virginia\')');
+    $sSQL = 'select page_title from content where page_namespace = 0 and page_id %10 = ';
+    $sSQL .= $aCMDResult['parse-wikipedia'];
+    $sSQL .= ' and (page_content ilike \'%{{Coord%\' or (page_content ilike \'%lat%\' and page_content ilike \'%lon%\'))';
+    $aArticleNames = $oDB->getCol($sSQL);
+    /* $aArticleNames = $oDB->getCol($sSQL = 'select page_title from content where page_namespace = 0
+        and (page_content ilike \'%{{Coord%\' or (page_content ilike \'%lat%\'
+        and page_content ilike \'%lon%\')) and page_title in (\'Virginia\')');
+     */
     foreach ($aArticleNames as $sArticleName) {
         $sPageText = $oDB->getOne('select page_content from content where page_namespace = 0 and page_title = \''.pg_escape_string($sArticleName).'\'');
         $aP = _templatesToProperties(_parseWikipediaContent($sPageText));
     foreach ($aArticleNames as $sArticleName) {
         $sPageText = $oDB->getOne('select page_content from content where page_namespace = 0 and page_title = \''.pg_escape_string($sArticleName).'\'');
         $aP = _templatesToProperties(_parseWikipediaContent($sPageText));
@@ -342,16 +350,18 @@ if (isset($aCMDResult['parse-wikipedia'])) {
     }
 }
 
     }
 }
 
+
 function nominatimXMLStart($hParser, $sName, $aAttr)
 {
 function nominatimXMLStart($hParser, $sName, $aAttr)
 {
-        global $aNominatRecords;
-        switch ($sName) {
+    global $aNominatRecords;
+    switch ($sName) {
         case 'PLACE':
         case 'PLACE':
-                $aNominatRecords[] = $aAttr;
-                break;
-        }
+            $aNominatRecords[] = $aAttr;
+            break;
+    }
 }
 
 }
 
+
 function nominatimXMLEnd($hParser, $sName)
 {
 }
 function nominatimXMLEnd($hParser, $sName)
 {
 }
@@ -369,106 +379,108 @@ if (isset($aCMDResult['link'])) {
 
         $sURL = $sNominatimBaseURL.'?format=xml&accept-language=en';
 
 
         $sURL = $sNominatimBaseURL.'?format=xml&accept-language=en';
 
-        echo "\n-- ".$aRecord['name'].", ".$aRecord['infobox_type']."\n";
+        echo "\n-- ".$aRecord['name'].', '.$aRecord['infobox_type']."\n";
         $fMaxDist = 0.0000001;
         $bUnknown = false;
         switch (strtolower($aRecord['infobox_type'])) {
         $fMaxDist = 0.0000001;
         $bUnknown = false;
         switch (strtolower($aRecord['infobox_type'])) {
-        case 'former country':
-            continue 2;
-        case 'sea':
-            $fMaxDist = 60; // effectively turn it off
-            $sURL .= "&viewbox=".($aRecord['lon']-$fMaxDist).",".($aRecord['lat']+$fMaxDist).",".($aRecord['lon']+$fMaxDist).",".($aRecord['lat']-$fMaxDist);
-            break;
-        case 'country':
-        case 'island':
-        case 'islands':
-        case 'continent':
-            $fMaxDist = 60; // effectively turn it off
-            $sURL .= "&featuretype=country";
-            $sURL .= "&viewbox=".($aRecord['lon']-$fMaxDist).",".($aRecord['lat']+$fMaxDist).",".($aRecord['lon']+$fMaxDist).",".($aRecord['lat']-$fMaxDist);
-            break;
-        case 'prefecture japan':
-            $aRecord['name'] = trim(str_replace(' Prefecture', ' ', $aRecord['name']));
-        case 'state':
-        case '#us state':
-        case 'county':
-        case 'u.s. state':
-        case 'u.s. state symbols':
-        case 'german state':
-        case 'province or territory of canada';
-        case 'indian jurisdiction';
-        case 'province';
-        case 'french region':
-        case 'region of italy':
-        case 'kommune':
-        case '#australia state or territory':
-        case 'russian federal subject':
-            $fMaxDist = 4;
-            $sURL .= "&featuretype=state";
-            $sURL .= "&viewbox=".($aRecord['lon']-$fMaxDist).",".($aRecord['lat']+$fMaxDist).",".($aRecord['lon']+$fMaxDist).",".($aRecord['lat']-$fMaxDist);
-            break;
-        case 'protected area':
-            $fMaxDist = 1;
-            $sURL .= "&nearlat=".$aRecord['lat'];
-            $sURL .= "&nearlon=".$aRecord['lon'];
-            $sURL .= "&viewbox=".($aRecord['lon']-$fMaxDist).",".($aRecord['lat']+$fMaxDist).",".($aRecord['lon']+$fMaxDist).",".($aRecord['lat']-$fMaxDist);
-            break;
-        case 'settlement':
-            $bUnknown = true;
-        case 'french commune':
-        case 'italian comune':
-        case 'uk place':
-        case 'italian comune':
-        case 'australian place':
-        case 'german place':
-        case '#geobox':
-        case 'u.s. county':
-        case 'municipality':
-        case 'city japan':
-        case 'russian inhabited locality':
-        case 'finnish municipality/land area':
-        case 'england county':
-        case 'israel municipality':
-        case 'russian city':
-        case 'city':
-            $fMaxDist = 0.2;
-            $sURL .= "&featuretype=settlement";
-            $sURL .= "&viewbox=".($aRecord['lon']-0.5).",".($aRecord['lat']+0.5).",".($aRecord['lon']+0.5).",".($aRecord['lat']-0.5);
-            break;
-        case 'mountain':
-        case 'mountain pass':
-        case 'river':
-        case 'lake':
-        case 'airport':
-            $fMaxDist = 0.2;
-            $sURL .= "&viewbox=".($aRecord['lon']-0.5).",".($aRecord['lat']+0.5).",".($aRecord['lon']+0.5).",".($aRecord['lat']-0.5);
-
-        case 'ship begin':
-            $fMaxDist = 0.1;
-            $aTypes = array('wreck');
-            $sURL .= "&viewbox=".($aRecord['lon']-0.01).",".($aRecord['lat']+0.01).",".($aRecord['lon']+0.01).",".($aRecord['lat']-0.01);
-            $sURL .= "&nearlat=".$aRecord['lat'];
-            $sURL .= "&nearlon=".$aRecord['lon'];
-            break;
-        case 'road':
-        case 'university':
-        case 'company':
-        case 'department':
-            $fMaxDist = 0.005;
-            $sURL .= "&viewbox=".($aRecord['lon']-0.01).",".($aRecord['lat']+0.01).",".($aRecord['lon']+0.01).",".($aRecord['lat']-0.01);
-            $sURL .= "&bounded=1";
-            $sURL .= "&nearlat=".$aRecord['lat'];
-            $sURL .= "&nearlon=".$aRecord['lon'];
-            break;
-        default:
-            $bUnknown = true;
-            $fMaxDist = 0.005;
-            $sURL .= "&viewbox=".($aRecord['lon']-0.01).",".($aRecord['lat']+0.01).",".($aRecord['lon']+0.01).",".($aRecord['lat']-0.01);
-//              $sURL .= "&bounded=1";
-            $sURL .= "&nearlat=".$aRecord['lat'];
-            $sURL .= "&nearlon=".$aRecord['lon'];
-            echo "-- Unknown: ".$aRecord['infobox_type']."\n";
-            break;
+            case 'former country':
+                continue 2;
+            case 'sea':
+                $fMaxDist = 60; // effectively turn it off
+                $sURL .= '&viewbox='.($aRecord['lon']-$fMaxDist).','.($aRecord['lat']+$fMaxDist).','.($aRecord['lon']+$fMaxDist).','.($aRecord['lat']-$fMaxDist);
+                break;
+            case 'country':
+            case 'island':
+            case 'islands':
+            case 'continent':
+                $fMaxDist = 60; // effectively turn it off
+                $sURL .= '&featuretype=country';
+                $sURL .= '&viewbox='.($aRecord['lon']-$fMaxDist).','.($aRecord['lat']+$fMaxDist).','.($aRecord['lon']+$fMaxDist).','.($aRecord['lat']-$fMaxDist);
+                break;
+            case 'prefecture japan':
+                $aRecord['name'] = trim(str_replace(' Prefecture', ' ', $aRecord['name']));
+                // intentionally no break
+            case 'state':
+            case '#us state':
+            case 'county':
+            case 'u.s. state':
+            case 'u.s. state symbols':
+            case 'german state':
+            case 'province or territory of canada':
+            case 'indian jurisdiction':
+            case 'province':
+            case 'french region':
+            case 'region of italy':
+            case 'kommune':
+            case '#australia state or territory':
+            case 'russian federal subject':
+                $fMaxDist = 4;
+                $sURL .= '&featuretype=state';
+                $sURL .= '&viewbox='.($aRecord['lon']-$fMaxDist).','.($aRecord['lat']+$fMaxDist).','.($aRecord['lon']+$fMaxDist).','.($aRecord['lat']-$fMaxDist);
+                break;
+            case 'protected area':
+                $fMaxDist = 1;
+                $sURL .= '&nearlat='.$aRecord['lat'];
+                $sURL .= '&nearlon='.$aRecord['lon'];
+                $sURL .= '&viewbox='.($aRecord['lon']-$fMaxDist).','.($aRecord['lat']+$fMaxDist).','.($aRecord['lon']+$fMaxDist).','.($aRecord['lat']-$fMaxDist);
+                break;
+            case 'settlement':
+                $bUnknown = true;
+                // intentionally no break
+            case 'french commune':
+            case 'italian comune':
+            case 'uk place':
+            case 'italian comune':
+            case 'australian place':
+            case 'german place':
+            case '#geobox':
+            case 'u.s. county':
+            case 'municipality':
+            case 'city japan':
+            case 'russian inhabited locality':
+            case 'finnish municipality/land area':
+            case 'england county':
+            case 'israel municipality':
+            case 'russian city':
+            case 'city':
+                $fMaxDist = 0.2;
+                $sURL .= '&featuretype=settlement';
+                $sURL .= '&viewbox='.($aRecord['lon']-0.5).','.($aRecord['lat']+0.5).','.($aRecord['lon']+0.5).','.($aRecord['lat']-0.5);
+                break;
+            case 'mountain':
+            case 'mountain pass':
+            case 'river':
+            case 'lake':
+            case 'airport':
+                $fMaxDist = 0.2;
+                $sURL .= '&viewbox='.($aRecord['lon']-0.5).','.($aRecord['lat']+0.5).','.($aRecord['lon']+0.5).','.($aRecord['lat']-0.5);
+                break;
+            case 'ship begin':
+                $fMaxDist = 0.1;
+                $aTypes = array('wreck');
+                $sURL .= '&viewbox='.($aRecord['lon']-0.01).','.($aRecord['lat']+0.01).','.($aRecord['lon']+0.01).','.($aRecord['lat']-0.01);
+                $sURL .= '&nearlat='.$aRecord['lat'];
+                $sURL .= '&nearlon='.$aRecord['lon'];
+                break;
+            case 'road':
+            case 'university':
+            case 'company':
+            case 'department':
+                $fMaxDist = 0.005;
+                $sURL .= '&viewbox='.($aRecord['lon']-0.01).','.($aRecord['lat']+0.01).','.($aRecord['lon']+0.01).','.($aRecord['lat']-0.01);
+                $sURL .= '&bounded=1';
+                $sURL .= '&nearlat='.$aRecord['lat'];
+                $sURL .= '&nearlon='.$aRecord['lon'];
+                break;
+            default:
+                $bUnknown = true;
+                $fMaxDist = 0.005;
+                $sURL .= '&viewbox='.($aRecord['lon']-0.01).','.($aRecord['lat']+0.01).','.($aRecord['lon']+0.01).','.($aRecord['lat']-0.01);
+                // $sURL .= "&bounded=1";
+                $sURL .= '&nearlat='.$aRecord['lat'];
+                $sURL .= '&nearlon='.$aRecord['lon'];
+                echo '-- Unknown: '.$aRecord['infobox_type']."\n";
+                break;
         }
         $sNameURL = $sURL.'&q='.urlencode($aRecord['name']);
 
         }
         $sNameURL = $sURL.'&q='.urlencode($aRecord['name']);
 
@@ -483,7 +495,7 @@ if (isset($aCMDResult['link'])) {
 
         if (!isset($aNominatRecords[0])) {
             $aNameParts = preg_split('#[(,]#', $aRecord['name']);
 
         if (!isset($aNominatRecords[0])) {
             $aNameParts = preg_split('#[(,]#', $aRecord['name']);
-            if (sizeof($aNameParts) > 1) {
+            if (count($aNameParts) > 1) {
                 $sNameURL = $sURL.'&q='.urlencode(trim($aNameParts[0]));
                 var_Dump($sNameURL);
                 $sXML = file_get_contents($sNameURL);
                 $sNameURL = $sURL.'&q='.urlencode(trim($aNameParts[0]));
                 var_Dump($sNameURL);
                 $sXML = file_get_contents($sNameURL);
@@ -492,12 +504,12 @@ if (isset($aCMDResult['link'])) {
                 $hXMLParser = xml_parser_create();
                 xml_set_element_handler($hXMLParser, 'nominatimXMLStart', 'nominatimXMLEnd');
                 xml_parse($hXMLParser, $sXML, true);
                 $hXMLParser = xml_parser_create();
                 xml_set_element_handler($hXMLParser, 'nominatimXMLStart', 'nominatimXMLEnd');
                 xml_parse($hXMLParser, $sXML, true);
-                xml_parser_free($hXMLParser);#
+                xml_parser_free($hXMLParser);
             }
         }
 
         // assume first is best/right
             }
         }
 
         // assume first is best/right
-        for ($i = 0; $i < sizeof($aNominatRecords); $i++) {
+        for ($i = 0; $i < count($aNominatRecords); $i++) {
             $fDiff = ($aRecord['lat']-$aNominatRecords[$i]['LAT']) * ($aRecord['lat']-$aNominatRecords[$i]['LAT']);
             $fDiff += ($aRecord['lon']-$aNominatRecords[$i]['LON']) * ($aRecord['lon']-$aNominatRecords[$i]['LON']);
             $fDiff = sqrt($fDiff);
             $fDiff = ($aRecord['lat']-$aNominatRecords[$i]['LAT']) * ($aRecord['lat']-$aNominatRecords[$i]['LAT']);
             $fDiff += ($aRecord['lon']-$aNominatRecords[$i]['LON']) * ($aRecord['lon']-$aNominatRecords[$i]['LON']);
             $fDiff = sqrt($fDiff);
@@ -514,17 +526,26 @@ if (isset($aCMDResult['link'])) {
                 elseif ($iRank <= 26) $fMaxDist = 0.001;
                 else $fMaxDist = 0.001;
             }
                 elseif ($iRank <= 26) $fMaxDist = 0.001;
                 else $fMaxDist = 0.001;
             }
-            echo "-- FOUND \"".substr($aNominatRecords[$i]['DISPLAY_NAME'], 0, 50)."\", ".$aNominatRecords[$i]['CLASS'].", ".$aNominatRecords[$i]['TYPE'].", ".$aNominatRecords[$i]['PLACE_RANK'].", ".$aNominatRecords[$i]['OSM_TYPE']." (dist:$fDiff, max:$fMaxDist)\n";
+            echo '-- FOUND "'.substr($aNominatRecords[$i]['DISPLAY_NAME'], 0, 50);
+            echo '", '.$aNominatRecords[$i]['CLASS'].', '.$aNominatRecords[$i]['TYPE'];
+            echo ', '.$aNominatRecords[$i]['PLACE_RANK'].', '.$aNominatRecords[$i]['OSM_TYPE'];
+            echo " (dist:$fDiff, max:$fMaxDist)\n";
             if ($fDiff > $fMaxDist) {
                 echo "-- Diff too big $fDiff (max: $fMaxDist)".$aRecord['lat'].','.$aNominatRecords[$i]['LAT'].' & '.$aRecord['lon'].','.$aNominatRecords[$i]['LON']." \n";
             } else {
             if ($fDiff > $fMaxDist) {
                 echo "-- Diff too big $fDiff (max: $fMaxDist)".$aRecord['lat'].','.$aNominatRecords[$i]['LAT'].' & '.$aRecord['lon'].','.$aNominatRecords[$i]['LON']." \n";
             } else {
-                $sSQL = "update wikipedia_article set osm_type=";
+                $sSQL = 'update wikipedia_article set osm_type=';
                 switch ($aNominatRecords[$i]['OSM_TYPE']) {
                 switch ($aNominatRecords[$i]['OSM_TYPE']) {
-                case 'relation': $sSQL .= "'R'"; break;
-                case 'way': $sSQL .= "'W'"; break;
-                case 'node': $sSQL .= "'N'"; break;
+                    case 'relation':
+                        $sSQL .= "'R'";
+                        break;
+                    case 'way':
+                        $sSQL .= "'W'";
+                        break;
+                    case 'node':
+                        $sSQL .= "'N'";
+                        break;
                 }
                 }
-                $sSQL .= ", osm_id=".$aNominatRecords[$i]['OSM_ID']." where language = '".pg_escape_string($aRecord['language'])."' and title = '".pg_escape_string($aRecord['title'])."'";
+                $sSQL .= ', osm_id='.$aNominatRecords[$i]['OSM_ID']." where language = '".pg_escape_string($aRecord['language'])."' and title = '".pg_escape_string($aRecord['title'])."'";
                 $oDB->query($sSQL);
                 break;
             }
                 $oDB->query($sSQL);
                 break;
             }