From 26e30bf8e18f6c7a30cc056972d1399994309270 Mon Sep 17 00:00:00 2001 From: Markus Gail Date: Thu, 10 Mar 2016 16:22:39 +0100 Subject: [PATCH] Implement geocoding and reverse geocoding with tiger interpolation lines instead of points. --- lib/Geocode.php | 106 +++++++++++++----------- lib/PlaceLookup.php | 30 ++++--- lib/ReverseGeocode.php | 16 ++-- lib/lib.php | 4 +- mytests/forward_tiger_functional.py | 43 ++++++++++ mytests/forward_tiger_time.py | 45 +++++++++++ mytests/random_points_bbox.py | 7 ++ mytests/reverse_tiger_functional.py | 49 ++++++++++++ mytests/reverse_tiger_time.py | 31 +++++++ settings/settings.php | 2 +- settings/settings.php~ | 120 ++++++++++++++++++++++++++++ sql/functions.sql | 22 ++--- tests/features/api/details.feature | 1 - tests/features/api/tiger.feature | 50 ++++++++++++ 14 files changed, 450 insertions(+), 76 deletions(-) create mode 100644 mytests/forward_tiger_functional.py create mode 100644 mytests/forward_tiger_time.py create mode 100644 mytests/random_points_bbox.py create mode 100644 mytests/reverse_tiger_functional.py create mode 100644 mytests/reverse_tiger_time.py create mode 100644 settings/settings.php~ create mode 100644 tests/features/api/tiger.feature diff --git a/lib/Geocode.php b/lib/Geocode.php index a6f8e0e5..b57b6f95 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -44,10 +44,6 @@ protected $sQuery = false; protected $aStructuredQuery = false; - - //for Tiger housenumber interpolation - protected $searchedHousenumber=-1; - protected $housenumberFound=false; function Geocode(&$oDB) { @@ -395,19 +391,20 @@ function getDetails($aPlaceIDs) { + //$aPlaceIDs is an array with key: placeID and value: tiger-housenumber, if found, else -1 if (sizeof($aPlaceIDs) == 0) return array(); $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$this->aLangPrefOrder))."]"; // Get the details for display (is this a redundant extra step?) - $sPlaceIDs = join(',',$aPlaceIDs); + $sPlaceIDs = join(',',array_keys($aPlaceIDs)); $sImportanceSQL = ''; if ($this->sViewboxSmallSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; if ($this->sViewboxLargeSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; $sSQL = "select osm_type,osm_id,class,type,admin_level,rank_search,rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id, calculated_country_code as country_code,"; - $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; + $sSQL .= "get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,"; $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,"; if ($this->bIncludeExtraTags) $sSQL .= "hstore_to_json(extratags)::text as extra,"; @@ -434,32 +431,42 @@ if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { - //query also location_property_tiger_line and location_property_aux - //Tiger search only if it was searched for a housenumber (searchedHousenumber >=0) and if it was found (housenumberFound = true) + //query also location_property_tiger and location_property_aux + //Tiger search only if a housenumber was searched and if it was found (i.e. aPlaceIDs[placeID] = housenumber != -1) (realized through a join) //only Tiger housenumbers need to be interpolated, because they are saved as lines with start- and endnumber, the common osm housenumbers are usually saved as points - if($this->searchedHousenumber>=0 && $this->housenumberFound){ - $sSQL .= "union "; - $sSQL .= "select 'T' as osm_type, place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code"; - $sSQL .= ", get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress "; - $sSQL .= ", null as placename"; - $sSQL .= ", null as ref"; - if ($this->bIncludeExtraTags) $sSQL .= ", null as extra"; - if ($this->bIncludeNameDetails) $sSQL .= ", null as names"; - $sSQL .= ", avg(st_x(point)) as lon, avg(st_y(point)) as lat"; - $sSQL .= $sImportanceSQL.", -1.15 as importance "; - $sSQL .= ", 1.0 as addressimportance "; //not sure how the addressimportance is/should be calculated for Tiger data - $sSQL .= ", null as extra_place "; - $sSQL .= " from (select place_id"; - //interpolate the Tiger housenumbers here - $sSQL .= ",ST_LineInterpolatePoint(linegeo, ($this->searchedHousenumber::float-startnumber::float)/(endnumber-startnumber)::float) as point, parent_place_id "; - $sSQL .= "from location_property_tiger_line where place_id in ($sPlaceIDs) "; - $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank) as blub"; //postgres wants an alias here - $sSQL .= " group by place_id"; //why group by place_id, isnt place_id unique? - if (!$this->bDeDupe) $sSQL .= ",place_id "; + $sHousenumbers = ""; + $i=0; + $length=count($aPlaceIDs); + foreach($aPlaceIDs as $placeID => $housenumber){ + $i++; + $sHousenumbers .= "(".$placeID.",".$housenumber.")"; + if($i<$length) + $sHousenumbers .= ","; } + + $sSQL .= "union "; + $sSQL .= "select 'T' as osm_type, place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code"; + $sSQL .= ", get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) as langaddress "; + $sSQL .= ", null as placename"; + $sSQL .= ", null as ref"; + if ($this->bIncludeExtraTags) $sSQL .= ", null as extra"; + if ($this->bIncludeNameDetails) $sSQL .= ", null as names"; + $sSQL .= ", avg(st_x(point)) as lon, avg(st_y(point)) as lat"; + $sSQL .= $sImportanceSQL.", -1.15 as importance "; + $sSQL .= ", (select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(blub.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance "; + $sSQL .= ", null as extra_place "; + $sSQL .= " from (select place_id"; + //interpolate the Tiger housenumbers here + $sSQL .= ",ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) as point, parent_place_id, housenumber_for_place "; + $sSQL .= "from (location_property_tiger "; + $sSQL .= " join (values ".$sHousenumbers.") as housenumbers(place_id,housenumber_for_place) using(place_id)) "; + $sSQL .= " where housenumber_for_place>=0 and 30 between $this->iMinAddressRank and $this->iMaxAddressRank) as blub"; //postgres wants an alias here + $sSQL .= " group by place_id, housenumber_for_place"; //is this group by really needed?, place_id + housenumber (in combination) are unique + if (!$this->bDeDupe) $sSQL .= ",place_id "; + $sSQL .= " union "; $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,"; - $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; + $sSQL .= "get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; if ($this->bIncludeExtraTags) $sSQL .= "null as extra,"; @@ -472,7 +479,7 @@ $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$this->bDeDupe) $sSQL .= ",place_id"; - $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; + $sSQL .= ",get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) "; } $sSQL .= " order by importance desc"; @@ -1089,6 +1096,7 @@ // TODO: suggestions // Start the search process + // array with: placeid => -1 | tiger-housenumber $aResultPlaceIDs = array(); $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); @@ -1206,6 +1214,7 @@ foreach($aSearches as $aSearch) { $iQueryLoop++; + $searchedHousenumber=-1; if (CONST_Debug) { echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; } if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); @@ -1431,18 +1440,18 @@ if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $this->oDB->getCol($sSQL); } - //if nothing was found in placex or location_property_aux, then search in Tiger data for this housenumber(location_property_tiger_line) + //if nothing was found in placex or location_property_aux, then search in Tiger data for this housenumber(location_property_tiger) + $searchedHousenumber = intval($aSearch['sHouseNumber']); if (!sizeof($aPlaceIDs)) { //$sSQL = "select place_id from location_property_tiger where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; //new query for lines, not housenumbers anymore - $this->searchedHousenumber = intval($aSearch['sHouseNumber']); - if($this->searchedHousenumber%2==0){ + if($searchedHousenumber%2==0){ //if housenumber is even, look for housenumber in streets with interpolationtype even or all - $sSQL = "select distinct place_id from location_property_tiger_line where parent_place_id in (".$sPlaceIDs.") and (interpolationtype='even' or interpolationtype='all') and ".$this->searchedHousenumber.">=startnumber and ".$this->searchedHousenumber."<=endnumber"; + $sSQL = "select distinct place_id from location_property_tiger where parent_place_id in (".$sPlaceIDs.") and (interpolationtype='even' or interpolationtype='all') and ".$searchedHousenumber.">=startnumber and ".$searchedHousenumber."<=endnumber"; }else{ //look for housenumber in streets with interpolationtype odd or all - $sSQL = "select distinct place_id from location_property_tiger_line where parent_place_id in (".$sPlaceIDs.") and (interpolationtype='odd' or interpolationtype='all') and ".$this->searchedHousenumber.">=startnumber and ".$this->searchedHousenumber."<=endnumber"; + $sSQL = "select distinct place_id from location_property_tiger where parent_place_id in (".$sPlaceIDs.") and (interpolationtype='odd' or interpolationtype='all') and ".$searchedHousenumber.">=startnumber and ".$searchedHousenumber."<=endnumber"; } if (sizeof($this->aExcludePlaceIDs)) @@ -1459,11 +1468,10 @@ if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])) { $aPlaceIDs = $aRoadPlaceIDs; - //set to false, if no housenumbers were found - $this->housenumberFound=false; + //set to -1, if no housenumbers were found + $searchedHousenumber=-1; }else{ - //housenumber was found - $this->housenumberFound=true; + //housenumber was found, remains saved in searchedHousenumber } } @@ -1594,7 +1602,8 @@ foreach($aPlaceIDs as $iPlaceID) { - $aResultPlaceIDs[$iPlaceID] = $iPlaceID; + // array for placeID => -1 | Tiger housenumber + $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber; } if ($iQueryLoop > 20) break; } @@ -1602,16 +1611,22 @@ if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) { // Need to verify passes rank limits before dropping out of the loop (yuk!) - $sSQL = "select place_id from placex where place_id in (".join(',',$aResultPlaceIDs).") "; + // reduces the number of place id, like a filter + $sSQL = "select place_id from placex where place_id in (".join(',',array_keys($aResultPlaceIDs)).") "; $sSQL .= "and (placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) $sSQL .= " OR (extratags->'place') = 'city'"; if ($this->aAddressRankList) $sSQL .= " OR placex.rank_address in (".join(',',$this->aAddressRankList).")"; - $sSQL .= ") UNION select place_id from location_property_tiger where place_id in (".join(',',$aResultPlaceIDs).") "; + $sSQL .= ") UNION select place_id from location_property_tiger where place_id in (".join(',',array_keys($aResultPlaceIDs)).") "; $sSQL .= "and (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',',$this->aAddressRankList).")"; $sSQL .= ")"; if (CONST_Debug) var_dump($sSQL); - $aResultPlaceIDs = $this->oDB->getCol($sSQL); + $aFilteredPlaceIDs = $this->oDB->getCol($sSQL); + $tempIDs = array(); + foreach($aFilteredPlaceIDs as $placeID){ + $tempIDs[$placeID]= $aResultPlaceIDs[$placeID]; //assign housenumber to placeID + } + $aResultPlaceIDs=$tempIDs; } //exit; @@ -1796,17 +1811,16 @@ { $aResult['label'] = $aClassType[$aResult['class'].':'.$aResult['type']]['label']; } - /* Implement this function later. if tag '&addressdetails=1' is set in query + // if tag '&addressdetails=1' is set in query if ($this->bIncludeAddressDetails) { - * getAddressDetails() is defined in lib.php and uses the SQL function get_addressdata in functions.sql - $aResult['address'] = getAddressDetails($this->oDB, $sLanguagePrefArraySQL, $aResult['place_id'], $aResult['country_code']); + // getAddressDetails() is defined in lib.php and uses the SQL function get_addressdata in functions.sql + $aResult['address'] = getAddressDetails($this->oDB, $sLanguagePrefArraySQL, $aResult['place_id'], $aResult['country_code'], $aResultPlaceIDs[$aResult['place_id']]); if ($aResult['extra_place'] == 'city' && !isset($aResult['address']['city'])) { $aResult['address'] = array_merge(array('city' => array_shift(array_values($aResult['address']))), $aResult['address']); } } - */ if ($this->bIncludeExtraTags) { if ($aResult['extra']) diff --git a/lib/PlaceLookup.php b/lib/PlaceLookup.php index c5129fee..261b41c0 100644 --- a/lib/PlaceLookup.php +++ b/lib/PlaceLookup.php @@ -6,6 +6,8 @@ protected $iPlaceID; protected $sType = false; + + protected $fTigerFraction =-1; protected $aLangPrefOrder = array(); @@ -65,6 +67,7 @@ { $this->setOSMID($details['osm_type'], $details['osm_id']); } + if (isset($details['fraction'])) $this->fTigerFraction = $details['fraction']; return $this->lookup(); } @@ -80,20 +83,24 @@ $sSQL = "select place_id,partition, 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, housenumber, null as street, null as isin, postcode,"; $sSQL .= " 'us' as country_code, parent_place_id, null as linked_place_id, 30 as rank_address, 30 as rank_search,"; $sSQL .= " coalesce(null,0.75-(30::float/40)) as importance, null as indexed_status, null as indexed_date, null as wikipedia, 'us' as calculated_country_code, "; - $sSQL .= " get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; + $sSQL .= " get_address_by_language(place_id, housenumber,$sLanguagePrefArraySQL) as langaddress,"; $sSQL .= " null as placename,"; $sSQL .= " null as ref,"; if ($this->bExtraTags) $sSQL .= " null as extra,"; if ($this->bNameDetails) $sSQL .= " null as names,"; - $sSQL .= " st_y(centroid) as lat,"; - $sSQL .= " st_x(centroid) as lon"; - $sSQL .= " from location_property_tiger where place_id = ".(int)$this->iPlaceID; + $sSQL .= " ST_X(point) as lon, ST_Y(point) as lat from (select *, ST_LineInterpolatePoint(linegeo, (housenumber-startnumber::float)/(endnumber-startnumber)::float) as point from "; + $sSQL .= " (select *, "; + $sSQL .= " CASE WHEN interpolationtype='odd' THEN floor((".$this->fTigerFraction."*(endnumber-startnumber)+startnumber)/2)::int*2+1"; + $sSQL .= " WHEN interpolationtype='even' THEN ((".$this->fTigerFraction."*(endnumber-startnumber)+startnumber+1)/2)::int*2"; + $sSQL .= " WHEN interpolationtype='all' THEN (".$this->fTigerFraction."*(endnumber-startnumber)+startnumber)::int"; + $sSQL .= " END as housenumber"; + $sSQL .= " from location_property_tiger where place_id = ".(int)$this->iPlaceID.") as blub1) as blub2"; } else { $sSQL = "select placex.place_id, partition, osm_type, osm_id, class, type, admin_level, housenumber, street, isin, postcode, country_code, parent_place_id, linked_place_id, rank_address, rank_search, "; $sSQL .= " coalesce(importance,0.75-(rank_search::float/40)) as importance, indexed_status, indexed_date, wikipedia, calculated_country_code, "; - $sSQL .= " get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= " get_name_by_language(name, $sLanguagePrefArraySQL) as placename,"; $sSQL .= " get_name_by_language(name, ARRAY['ref']) as ref,"; if ($this->bExtraTags) $sSQL .= " hstore_to_json(extratags) as extra,"; @@ -115,7 +122,10 @@ if ($this->bAddressDetails) { - $aAddress = $this->getAddressNames(); + if($this->sType == 'tiger') // to get addressdetails for tiger data, the housenumber is needed + $aAddress = $this->getAddressNames($aPlace['housenumber']); + else + $aAddress = $this->getAddressNames(); $aPlace['aAddress'] = $aAddress; } @@ -163,13 +173,13 @@ return $aPlace; } - function getAddressDetails($bAll = false) + function getAddressDetails($bAll = false, $housenumber = -1) { if (!$this->iPlaceID) return null; $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted", $this->aLangPrefOrder))."]"; - $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(".$this->iPlaceID.")"; + $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(".$this->iPlaceID.",".$housenumber.")"; if (!$bAll) $sSQL .= " WHERE isaddress OR type = 'country_code'"; $sSQL .= " order by rank_address desc,isaddress desc"; @@ -182,9 +192,9 @@ return $aAddressLines; } - function getAddressNames() + function getAddressNames($housenumber = -1) { - $aAddressLines = $this->getAddressDetails(false); + $aAddressLines = $this->getAddressDetails(false, $housenumber); $aAddress = array(); $aFallback = array(); diff --git a/lib/ReverseGeocode.php b/lib/ReverseGeocode.php index e40ce6cc..a296114f 100644 --- a/lib/ReverseGeocode.php +++ b/lib/ReverseGeocode.php @@ -110,15 +110,15 @@ if ($bIsInUnitedStates && $iMaxRank_orig >= 28 && $iPlaceID && ($aPlace['rank_search'] == 26 || $aPlace['rank_search'] == 27 )) { $fSearchDiam = 0.001; - $sSQL = 'SELECT place_id,parent_place_id,30 as rank_search '; - if (CONST_Debug) { $sSQL .= ', housenumber, ST_distance('.$sPointSQL.', centroid) as distance, st_y(centroid) as lat, st_x(centroid) as lon'; } + $sSQL = 'SELECT place_id,parent_place_id,30 as rank_search, ST_line_locate_point(linegeo,'.$sPointSQL.') as fraction'; + //if (CONST_Debug) { $sSQL .= ', housenumber, ST_distance('.$sPointSQL.', centroid) as distance, st_y(centroid) as lat, st_x(centroid) as lon'; } $sSQL .= ' FROM location_property_tiger WHERE parent_place_id = '.$iPlaceID; - $sSQL .= ' AND ST_DWithin('.$sPointSQL.', centroid, '.$fSearchDiam.')'; - $sSQL .= ' ORDER BY ST_distance('.$sPointSQL.', centroid) ASC limit 1'; + $sSQL .= ' AND ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')'; //no centroid anymore in Tiger data, now we have lines + $sSQL .= ' ORDER BY ST_distance('.$sPointSQL.', linegeo) ASC limit 1'; // print all house numbers in the parent (street) - if (CONST_Debug) + /*if (CONST_Debug) { $sSQL = preg_replace('/limit 1/', 'limit 100', $sSQL); var_dump($sSQL); @@ -128,7 +128,7 @@ { echo $i['housenumber'] . ' | ' . $i['distance'] * 1000 . ' | ' . $i['lat'] . ' | ' . $i['lon']. ' | '. "
\n"; } - } + }*/ $aPlaceTiger = $this->oDB->getRow($sSQL); if (PEAR::IsError($aPlace)) @@ -142,6 +142,7 @@ $aPlace = $aPlaceTiger; $iPlaceID = $aPlaceTiger['place_id']; $iParentPlaceID = $aPlaceTiger['parent_place_id']; // the street + $iFraction = $aPlaceTiger['fraction']; } } @@ -165,7 +166,8 @@ } return array('place_id' => $iPlaceID, - 'type' => $bPlaceIsTiger ? 'tiger' : 'osm'); + 'type' => $bPlaceIsTiger ? 'tiger' : 'osm', + 'fraction' => $bPlaceIsTiger ? $iFraction : -1 ); } } ?> diff --git a/lib/lib.php b/lib/lib.php index e81d85be..d390a482 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -780,9 +780,9 @@ } - function getAddressDetails(&$oDB, $sLanguagePrefArraySQL, $iPlaceID, $sCountryCode = false, $bRaw = false) + function getAddressDetails(&$oDB, $sLanguagePrefArraySQL, $iPlaceID, $sCountryCode = false, $housenumber, $bRaw = false) { - $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID)"; + $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, $housenumber)"; if (!$bRaw) $sSQL .= " WHERE isaddress OR type = 'country_code'"; $sSQL .= " order by rank_address desc,isaddress desc"; diff --git a/mytests/forward_tiger_functional.py b/mytests/forward_tiger_functional.py new file mode 100644 index 00000000..c12e3998 --- /dev/null +++ b/mytests/forward_tiger_functional.py @@ -0,0 +1,43 @@ +import numpy as np +import urllib2 as url +import json as json +import random_points_bbox +import time + +def test(num): + #first get some random points in the bbox + aPoints = random_points_bbox.getPoints(num, -100.815, 46.789, -100.717, 46.84) + #get the addresses + sReverseUrl = "http://localhost/nominatim/reverse.php?format=json&lat=%f&lon=%f" + aAddresses = [] + for point in aPoints: + response = url.urlopen(sReverseUrl % (point[1], point[0])) + aAddresses.append(json.load(response)['address']) + #print aAddresses + # now we have all the addresses of the points in a list + # lets forward geocode this list + sOldUrl = "http://localhost/nominatim_old/search.php?format=json&city=%s&street=%s&addressdetails=1" + sLineUrl = "http://localhost/nominatim/search.php?format=json&city=%s&street=%s&addressdetails=1" + diff_lat =0 + diff_lon =0 + points =0 + for address in aAddresses: + if 'house_number' in address and 'road' in address: + responseOld = url.urlopen(sOldUrl % (address['city'], address['house_number']+' '+address['road'])) + dataOld = json.load(responseOld) + print dataOld[0]['display_name'] + responseLine = url.urlopen(sLineUrl % (address['city'], address['house_number']+' '+address['road'])) + dataLine = json.load(responseLine) + print dataLine[0]['display_name'] + temp_diff_lat = np.abs(float(dataOld[0]['lat'])-float(dataLine[0]['lat'])) + temp_diff_lon = np.abs(float(dataOld[0]['lon'])-float(dataLine[0]['lon'])) + print "diff lat: "+str(temp_diff_lat*111166)+", diff lon: "+str(temp_diff_lon*250456) + diff_lat += temp_diff_lat + diff_lon += temp_diff_lon + points +=1 + + print "Average difference in lat degrees with %d elements: %f (meters: %f)" % (points, diff_lat/points, diff_lat/points*111166) + print "Average difference in lon degrees with %d elements: %f (meters: %f)" % (points, diff_lon/points, diff_lon/points*250456) + # at 46.8 deg: 1 deg lat=111.166, 1 deg lon=250.456 + +test(20) diff --git a/mytests/forward_tiger_time.py b/mytests/forward_tiger_time.py new file mode 100644 index 00000000..26fb5901 --- /dev/null +++ b/mytests/forward_tiger_time.py @@ -0,0 +1,45 @@ +import numpy as np +import urllib2 as url +import json as json +import random_points_bbox +import time + +def test(num): + #first get some random points in the bbox + aPoints = random_points_bbox.getPoints(num, -100.815, 46.789, -100.717, 46.84) + #get the addresses + sReverseUrl = "http://localhost/nominatim_old/reverse.php?format=json&lat=%f&lon=%f" + aAddresses = [] + for point in aPoints: + response = url.urlopen(sReverseUrl % (point[1], point[0])) + aAddresses.append(json.load(response)['address']) + #print aAddresses + # now we have all the addresses of the points in a list + # lets forward geocode this list + sOldUrl = "http://localhost/nominatim_old/search.php?format=json&city=%s&street=%s&addressdetails=1" + sLineUrl = "http://localhost/nominatim/search.php?format=json&city=%s&street=%s&addressdetails=1" + start_old = time.time() + for address in aAddresses: + if 'house_number' in address and 'road' in address: + responseOld = url.urlopen(sOldUrl % (address['city'], address['house_number']+' '+address['road'])) + #dataOld = json.load(responseOld) + #print dataOld[0]['display_name'] + elif 'road' in address: + responseOld = url.urlopen(sOldUrl % (address['city'], address['road'])) + #dataOld = json.load(responseOld) + #print dataOld[0]['display_name'] + end_old = time.time() + for address in aAddresses: + if 'house_number' in address and 'road' in address: + responseLine = url.urlopen(sLineUrl % (address['city'], address['house_number']+' '+address['road'])) + elif 'road' in address: + responseLine = url.urlopen(sLineUrl % (address['city'], address['road'])) + end_line = time.time() + + print "Seconds old search for %d elements: %f" % (num,end_old-start_old) + print "Seconds line search for %d elements: %f" % (num,end_line-end_old) + + +test(100) +# 100 points: old: 7.11 sec, new: 7.47 sec +# 1000 points: old: 65.69 sec, new: 66.96 sec diff --git a/mytests/random_points_bbox.py b/mytests/random_points_bbox.py new file mode 100644 index 00000000..b8270794 --- /dev/null +++ b/mytests/random_points_bbox.py @@ -0,0 +1,7 @@ +import numpy as np + +def getPoints(num, sw_lng, sw_lat, ne_lng, ne_lat): + aResult = np.empty(shape=(num,2)) + for i in range(0,num): + aResult[i] = [np.random.uniform(ne_lng, sw_lng), np.random.uniform(sw_lat, ne_lat)] + return aResult diff --git a/mytests/reverse_tiger_functional.py b/mytests/reverse_tiger_functional.py new file mode 100644 index 00000000..ed2a946c --- /dev/null +++ b/mytests/reverse_tiger_functional.py @@ -0,0 +1,49 @@ +import numpy as np +import urllib2 as url +import json as json +import random_points_bbox + +def test_compare(strUrl1, strUrl2, iPoints): + #define bounding box for test + # sw: left-lower corner + sw_lng= -100.815 + sw_lat= 46.789 + # ne right-top corner + ne_lng= -100.717 + ne_lat= 46.84 + #first get some random points in the bbox + aPoints = random_points_bbox.getPoints(iPoints, -100.815, 46.789, -100.717, 46.84) + same = 0 + differ = 0 + differ_street=0 + missing_housenumber_1=0 + missing_housenumber_2=0 + for point in aPoints: + response = url.urlopen( strUrl1 % (point[1],point[0])) + data1 = json.load(response) + response = url.urlopen(strUrl2 % (point[1],point[0])) + data2 = json.load(response) + if data1['address'] == data2['address']: + same+=1 + elif 'road' in data1['address'] and 'road' in data2['address']: + differ+=1 + print 'different: '+str(data1['address'])+' - ' + str(data2['address']) + if data1['address']['road'] != data2['address']['road']: + differ_street +=1 + if 'house_number' not in data1['address']: + missing_housenumber_1 +=1 + print 'missing housenumber in Line: '+str(data1['address']) + if 'house_number' not in data2['address']: + missing_housenumber_2 +=1 + print 'missing housenumber in Old: '+str(data2['address']) + + + print 'Number of same values: '+str(same) + print 'Number of different values: '+str(differ) + print 'Number of different streets: '+str(differ_street) + print 'Points without housenumber in Line: '+str(missing_housenumber_1) + print 'Points without housenumber in Old: '+str(missing_housenumber_2) +strUrlLine = "http://localhost/nominatim/reverse.php?format=json&lat=%f&lon=%f" +strUrlOld = "http://localhost/nominatim_old/reverse.php?format=json&lat=%f&lon=%f" + +test_compare(strUrlLine,strUrlOld, 100) diff --git a/mytests/reverse_tiger_time.py b/mytests/reverse_tiger_time.py new file mode 100644 index 00000000..afef6b42 --- /dev/null +++ b/mytests/reverse_tiger_time.py @@ -0,0 +1,31 @@ +import numpy as np +import urllib2 as url +import time + +def test(strUrl, iPoints): + #define bounding box for test + # sw: left-lower corner + sw_lng= -100.815 + sw_lat= 46.789 + # ne right-top corner + ne_lng= -100.717 + ne_lat= 46.84 + aXvalues = np.linspace(ne_lng, sw_lng, num=iPoints) + aYvalues = np.linspace(sw_lat, ne_lat, num=iPoints) + for x in aXvalues: + for y in aYvalues: + url.urlopen( strUrl % (y,x)) + +strUrlLine = "http://localhost/nominatim/reverse.php?format=json&lat=%f&lon=%f" +start_time_line=time.time() +test(strUrlLine, 10) +end_time_line=time.time() +strUrlOld = "http://localhost/nominatim_old/reverse.php?format=json&lat=%f&lon=%f" +start_time_old=time.time() +test(strUrlOld, 10) +end_time_old=time.time() +print("Line: --- %s seconds ---" % (end_time_line-start_time_line)) +print("Old: --- %s seconds ---" % (end_time_old-start_time_old)) + +#tested on 9th March 2016: Line: 354 seconds, Old: 363 seconds (with iPoints=100 => 10.000 single points) +# Line: 3.586 sec, Old: 3.643 sec (witch iPoints=10 => 100 single points) diff --git a/settings/settings.php b/settings/settings.php index 21033ff3..a054eb25 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -4,7 +4,7 @@ // General settings @define('CONST_Debug', false); - @define('CONST_Database_DSN', 'pgsql://@/nominatim'); // ://:@:/ + @define('CONST_Database_DSN', 'pgsql://@/nominatim_lines'); // ://:@:/ @define('CONST_Database_Web_User', 'www-data'); @define('CONST_Max_Word_Frequency', '50000'); @define('CONST_Limit_Reindexing', true); diff --git a/settings/settings.php~ b/settings/settings.php~ new file mode 100644 index 00000000..a054eb25 --- /dev/null +++ b/settings/settings.php~ @@ -0,0 +1,120 @@ +://:@:/ + @define('CONST_Database_Web_User', 'www-data'); + @define('CONST_Max_Word_Frequency', '50000'); + @define('CONST_Limit_Reindexing', true); + + // Proxy settings + @define('CONST_HTTP_Proxy', false); + @define('CONST_HTTP_Proxy_Host', 'proxy.mydomain.com'); + @define('CONST_HTTP_Proxy_Port', '3128'); + @define('CONST_HTTP_Proxy_Login', ''); + @define('CONST_HTTP_Proxy_Password', ''); + + // Software versions + @define('CONST_Postgresql_Version', '9.3'); // values: 9.0, ... , 9.4 + @define('CONST_Postgis_Version', '2.1'); // values: 1.5, 2.0, 2.1 + + // Paths + @define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/'.CONST_Postgresql_Version.'/contrib'); + @define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-'.CONST_Postgis_Version); + @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql'); + @define('CONST_Osmosis_Binary', '/usr/bin/osmosis'); + @define('CONST_Tiger_Data_Path', CONST_BasePath.'/data/tiger'); + + // osm2pgsql settings + @define('CONST_Osm2pgsql_Flatnode_File', null); + + // tablespace settings + // osm2pgsql caching tables (aka slim mode tables) - update only + @define('CONST_Tablespace_Osm2pgsql_Data', false); + @define('CONST_Tablespace_Osm2pgsql_Index', false); + // osm2pgsql output tables (aka main table) - update only + @define('CONST_Tablespace_Place_Data', false); + @define('CONST_Tablespace_Place_Index', false); + // address computation tables - update only + @define('CONST_Tablespace_Address_Data', false); + @define('CONST_Tablespace_Address_Index', false); + // search tables - needed for lookups + @define('CONST_Tablespace_Search_Data', false); + @define('CONST_Tablespace_Search_Index', false); + // additional data, e.g. TIGER data, type searches - needed for lookups + @define('CONST_Tablespace_Aux_Data', false); + @define('CONST_Tablespace_Aux_Index', false); + + // Replication settings + @define('CONST_Replication_Url', 'http://planet.openstreetmap.org/replication/minute'); + @define('CONST_Replication_MaxInterval', '3600'); + @define('CONST_Replication_Update_Interval', '60'); // How often upstream publishes diffs + @define('CONST_Replication_Recheck_Interval', '60'); // How long to sleep if no update found yet + + // Connection buckets to rate limit people being nasty + @define('CONST_ConnectionBucket_MemcacheServerAddress', false); + @define('CONST_ConnectionBucket_MemcacheServerPort', 11211); + @define('CONST_ConnectionBucket_MaxBlockList', 100); + @define('CONST_ConnectionBucket_LeakRate', 1); + @define('CONST_ConnectionBucket_BlockLimit', 10); + @define('CONST_ConnectionBucket_WaitLimit', 6); + @define('CONST_ConnectionBucket_MaxSleeping', 10); + @define('CONST_ConnectionBucket_Cost_Reverse', 1); + @define('CONST_ConnectionBucket_Cost_Search', 2); + @define('CONST_ConnectionBucket_Cost_Details', 3); + @define('CONST_ConnectionBucket_Cost_Status', 1); + + // Override this function to add an adjustment factor to the cost + // based on server load. e.g. getBlockingProcesses + if (!function_exists('user_busy_cost')) + { + function user_busy_cost() + { + return 0; + } + } + + // Website settings + @define('CONST_NoAccessControl', true); + @define('CONST_ClosedForIndexing', false); + @define('CONST_ClosedForIndexingExceptionIPs', ''); + @define('CONST_BlockedIPs', ''); + @define('CONST_BulkUserIPs', ''); + @define('CONST_BlockMessage', ''); // additional info to show for blocked IPs + + @define('CONST_Website_BaseURL', 'http://localhost/nominatim/'); + @define('CONST_Tile_Default', 'Mapnik'); + + @define('CONST_Default_Language', false); + @define('CONST_Default_Lat', 20.0); + @define('CONST_Default_Lon', 0.0); + @define('CONST_Default_Zoom', 2); + @define('CONST_Map_Tile_URL', 'http://{s}.tile.osm.org/{z}/{x}/{y}.png'); + @define('CONST_Map_Tile_Attribution', ''); // Set if tile source isn't osm.org + + @define('CONST_Search_AreaPolygons_Enabled', true); + @define('CONST_Search_AreaPolygons', true); + + @define('CONST_Search_BatchMode', false); + + @define('CONST_Search_TryDroppedAddressTerms', false); + @define('CONST_Search_NameOnlySearchFrequencyThreshold', 500); + // If set to true, then reverse order of queries will be tried by default. + // When set to false only selected languages alloow reverse search. + @define('CONST_Search_ReversePlanForAll', true); + + @define('CONST_Places_Max_ID_count', 50); + + // Set to zero to disable polygon output + @define('CONST_PolygonOutput_MaximumTypes', 1); + + // Log settings + @define('CONST_Log_DB', true); + @define('CONST_Log_File', false); + @define('CONST_Log_File_Format', 'TODO'); // Currently hard coded + @define('CONST_Log_File_SearchLog', ''); + @define('CONST_Log_File_ReverseLog', ''); + + diff --git a/sql/functions.sql b/sql/functions.sql index bd64697a..e060a265 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -2183,7 +2183,8 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, languagepref TEXT[]) RETURNS TEXT +--housenumber only needed for tiger data +CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, housenumber INTEGER, languagepref TEXT[]) RETURNS TEXT AS $$ DECLARE result TEXT[]; @@ -2195,7 +2196,7 @@ BEGIN result := '{}'; prevresult := ''; - FOR location IN select * from get_addressdata(for_place_id) where isaddress order by rank_address desc LOOP + FOR location IN select * from get_addressdata(for_place_id,housenumber) where isaddress order by rank_address desc LOOP currresult := trim(get_name_by_language(location.name, languagepref)); IF currresult != prevresult AND currresult IS NOT NULL AND result[(100 - location.rank_address)] IS NULL THEN result[(100 - location.rank_address)] := trim(get_name_by_language(location.name, languagepref)); @@ -2223,10 +2224,10 @@ create type addressline as ( distance FLOAT ); -CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT) RETURNS setof addressline +CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT, in_housenumber INTEGER) RETURNS setof addressline AS $$ DECLARE - for_place_id BIGINT; + for_place_id BIGINT;--parent_place_id result TEXT[]; search TEXT[]; found INTEGER; @@ -2242,11 +2243,14 @@ DECLARE countryname HSTORE; hadcountry BOOLEAN; BEGIN - - select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_tiger - WHERE place_id = in_place_id - INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; - + --first query tiger data + select parent_place_id,'us', 30, postcode, null, 'place', 'house' from location_property_tiger + WHERE place_id = in_place_id AND in_housenumber>=startnumber AND in_housenumber <= endnumber + INTO for_place_id,searchcountrycode, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; + IF for_place_id IS NOT NULL THEN + searchhousenumber = in_housenumber::text; + END IF; + IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux WHERE place_id = in_place_id diff --git a/tests/features/api/details.feature b/tests/features/api/details.feature index fd0b0e01..e59659c3 100644 --- a/tests/features/api/details.feature +++ b/tests/features/api/details.feature @@ -11,4 +11,3 @@ Feature: Object details | N158845944 | W72493656 | R62422 - diff --git a/tests/features/api/tiger.feature b/tests/features/api/tiger.feature new file mode 100644 index 00000000..a5db90f3 --- /dev/null +++ b/tests/features/api/tiger.feature @@ -0,0 +1,50 @@ +Feature: Tiger geocoding + Testing the forward and reverse Geocoding functions with tiger lines + + + @Tiger + Scenario: TIGER house number in Bismarck ND + Given the request parameters + | addressdetails + | 1 + When looking up coordinates 46.806715,-100.765655 + And exactly 1 result is returned + And result addresses contain + | ID | house_number | road | postcode | country_code + | 0 | 1746 | East Broadway Avenue | 58501 | us + And result 0 has not attributes osm_id,osm_type + + @Tiger + Scenario: No TIGER house number for zoom < 18 + Given the request parameters + | addressdetails | zoom + | 1 | 17 + When looking up coordinates 46.806715,-100.765655 + And exactly 1 result is returned + And result addresses contain + | ID | road | postcode | country_code + | 0 | East Broadway Avenue | 58501 | us + And result 0 has attributes osm_id,osm_type + + @Tiger + Scenario: TIGER house number + When sending json search query "2501 Harding Avenue, Bismarck" + Then result 0 has not attributes osm_id,osm_type + + @Tiger + Scenario: TIGER house number (road fallback) + When sending json search query "1 Harding Avenue, Bismarck" + Then result 0 has attributes osm_id,osm_type + + @Tiger + Scenario: TIGER accepted-language + Given the request parameters + | addressdetails | accept-language + | 1 | de + When looking up coordinates 46.806715,-100.765655 + And exactly 1 result is returned + And result addresses contain + | ID | house_number | road | postcode | country |country_code + | 0 | 1746 | East Broadway Avenue | 58501 | Vereinigte Staaten von Amerika | us + And result 0 has not attributes osm_id,osm_type + -- 2.39.5