From 2148d81474728c4d59100f8d281ca0dfbe5e7ac2 Mon Sep 17 00:00:00 2001 From: Brian Quinion Date: Fri, 18 Mar 2011 09:52:16 +0000 Subject: [PATCH] calculate search position based to 'importance' rather than address rank --- lib/lib.php | 6 ++- nominatim/import.c | 37 +++++++++++++------ sql/functions.sql | 11 +++++- sql/tables.sql | 9 +++++ website/details.php | 5 +++ website/reverse.php | 9 ++++- website/search.php | 90 +++++++++++++++++++++++++++++---------------- 7 files changed, 120 insertions(+), 47 deletions(-) diff --git a/lib/lib.php b/lib/lib.php index 8dc2570c..d36e6472 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -40,6 +40,9 @@ function byImportance($a, $b) { +/* + if ($a['importance'] != $b['importance']) + return ($a['importance'] > $b['importance']?-1:1); if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures']) return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1); if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area']) @@ -48,8 +51,7 @@ // return ($a['levenshtein'] < $b['levenshtein']?-1:1); if ($a['rank_search'] != $b['rank_search']) return ($a['rank_search'] < $b['rank_search']?-1:1); - if ($a['importance'] != $b['importance']) - return ($a['importance'] < $b['importance']?-1:1); +*/ return ($a['foundorder'] < $b['foundorder']?-1:1); } diff --git a/nominatim/import.c b/nominatim/import.c index 89851d2f..a171cdcb 100644 --- a/nominatim/import.c +++ b/nominatim/import.c @@ -140,6 +140,13 @@ void StartElement(xmlTextReaderPtr reader, const xmlChar *name) feature.rankSearch = xmlTextReaderGetAttribute(reader, BAD_CAST "importance"); feature.parentPlaceID = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_place_id"); +/* + if (strlen(feature.parentPlaceID) == 0) + { + xmlFree(feature.parentPlaceID); + feature.parentPlaceID = NULL; + } +*/ feature.parentType = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_type"); feature.parentID = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_id"); @@ -320,7 +327,7 @@ void StartElement(xmlTextReaderPtr reader, const xmlChar *name) void EndElement(xmlTextReaderPtr reader, const xmlChar *name) { PGresult * res; - const char * paramValues[11]; + const char * paramValues[14]; char * place_id; char * partionQueryName; int i, namePos, lineTypeLen, lineValueLen; @@ -438,6 +445,8 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name) } paramValues[5] = (const char *)featureNameString; + paramValues[6] = (const char *)feature.countryCode; + featureExtraTagString[0] = 0; if (featureExtraTagLines) { @@ -464,18 +473,21 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name) strcpy(featureExtraTagString+(namePos++), "\""); } } - paramValues[6] = (const char *)featureExtraTagString; + paramValues[7] = (const char *)featureExtraTagString; - paramValues[7] = (const char *)feature.parentPlaceID; + if (strlen(feature.parentPlaceID) == 0) + paramValues[8] = "0"; + else + paramValues[8] = (const char *)feature.parentPlaceID; - paramValues[8] = (const char *)feature.adminLevel; - paramValues[9] = (const char *)feature.houseNumber; - paramValues[10] = (const char *)feature.rankAddress; - paramValues[11] = (const char *)feature.rankSearch; - paramValues[12] = (const char *)feature.geometry; + paramValues[9] = (const char *)feature.adminLevel; + paramValues[10] = (const char *)feature.houseNumber; + paramValues[11] = (const char *)feature.rankAddress; + paramValues[12] = (const char *)feature.rankSearch; + paramValues[13] = (const char *)feature.geometry; if (strlen(paramValues[3])) { - res = PQexecPrepared(conn, "placex_insert", 13, paramValues, NULL, NULL, 0); + res = PQexecPrepared(conn, "placex_insert", 14, paramValues, NULL, NULL, 0); if (PQresultStatus(res) != PGRES_COMMAND_OK) { fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn)); @@ -561,6 +573,9 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name) xmlFree(feature.value); xmlFree(feature.rankAddress); xmlFree(feature.rankSearch); + if (feature.parentPlaceID) xmlFree(feature.parentPlaceID); + if (feature.parentType) xmlFree(feature.parentType); + if (feature.parentID) xmlFree(feature.parentID); // if (feature.name) xmlFree(feature.name); if (feature.countryCode) xmlFree(feature.countryCode); if (feature.adminLevel) xmlFree(feature.adminLevel); @@ -704,8 +719,8 @@ int nominatim_import(const char *conninfo, const char *partionTagsFilename, cons } res = PQprepare(conn, "placex_insert", - "insert into placex (place_id,osm_type,osm_id,class,type,name,extratags,parent_place_id,admin_level,housenumber,rank_address,rank_search,geometry) " - "values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, st_setsrid($13, 4326))", + "insert into placex (place_id,osm_type,osm_id,class,type,name,country_code,extratags,parent_place_id,admin_level,housenumber,rank_address,rank_search,geometry) " + "values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, st_setsrid($14, 4326))", 12, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) { diff --git a/sql/functions.sql b/sql/functions.sql index 4f5ff8a7..c6b76ad1 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1220,7 +1220,9 @@ BEGIN DELETE FROM place_boundingbox where place_id = NEW.place_id; result := deleteRoad(NEW.partition, NEW.place_id); result := deleteLocationArea(NEW.partition, NEW.place_id); - + + -- reclaculate country and partition (should probably have a country_code and calculated_country_code as seperate fields) + SELECT country_code from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO NEW.country_code; NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code)); NEW.partition := get_partition(NEW.geometry, NEW.country_code); NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry); @@ -1589,6 +1591,12 @@ BEGIN -- RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type; + -- deleting large polygons can have a massive effect ont he system - require manual intervention to let them through + IF st_area(OLD.geometry) > 2 THEN + insert into import_polygon_delete values (OLD.osm_type,OLD.osm_id,OLD.class,OLD.type); + RETURN NULL; + END IF; + -- mark for delete UPDATE placex set indexed_status = 100 where osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type; @@ -1665,6 +1673,7 @@ BEGIN END IF; DELETE from import_polygon_error where osm_type = NEW.osm_type and osm_id = NEW.osm_id; + DELETE from import_polygon_delete where osm_type = NEW.osm_type and osm_id = NEW.osm_id; -- To paraphrase, if there isn't an existing item, OR if the admin level has changed, OR if it is a major change in geometry IF existing.osm_type IS NULL diff --git a/sql/tables.sql b/sql/tables.sql index 35d6394a..1abf530a 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -299,3 +299,12 @@ CREATE TABLE import_polygon_error ( SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); + +drop table import_polygon_delete; +CREATE TABLE import_polygon_delete ( + osm_type char(1), + osm_id INTEGER, + class TEXT NOT NULL, + type TEXT NOT NULL + ); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); diff --git a/website/details.php b/website/details.php index 3c4581d5..2e5a6c9f 100755 --- a/website/details.php +++ b/website/details.php @@ -84,6 +84,11 @@ { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['outlinestring'],$aMatch)) + { + // TODO: this just takes the first ring + preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); + } elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['outlinestring'],$aMatch)) { $fRadius = 0.01; diff --git a/website/reverse.php b/website/reverse.php index 116cddd1..b0205d22 100755 --- a/website/reverse.php +++ b/website/reverse.php @@ -25,6 +25,10 @@ $sOutputFormat = $_GET['format']; } + // Show address breakdown + $bShowAddressDetails = true; + if (isset($_GET['addressdetails'])) $bShowAddressDetails = (bool)$_GET['addressdetails']; + // Prefered language $aLangPrefOrder = getPrefferedLangauges(); $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]"; @@ -143,7 +147,10 @@ $sSQL .= " from placex where place_id = $iPlaceID "; $aPlace = $oDB->getRow($sSQL); - $aAddress = getAddressDetails($oDB, $sLanguagePrefArraySQL, $iPlaceID, $aPlace['country_code']); + if ($bShowAddressDetails) + { + $aAddress = getAddressDetails($oDB, $sLanguagePrefArraySQL, $iPlaceID, $aPlace['country_code']); + } $aClassType = getClassTypes(); $sAddressType = ''; diff --git a/website/search.php b/website/search.php index d6007ef5..80a77dfc 100755 --- a/website/search.php +++ b/website/search.php @@ -37,7 +37,12 @@ // Prefered language $aLangPrefOrder = getPrefferedLangauges(); -// if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true; + if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true; + if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true; + if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true; + +$bReverseInPlan = true; + $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]"; if (isset($_GET['exclude_place_ids']) && $_GET['exclude_place_ids']) @@ -62,7 +67,11 @@ break; case 'city': $iMinAddressRank = 14; - $iMaxAddressRank = 18; + $iMaxAddressRank = 16; + break; + case 'settlement': + $iMinAddressRank = 8; + $iMaxAddressRank = 20; break; } } @@ -283,6 +292,7 @@ var_dump($sSQL, $aDatabaseWords); exit; } + $aPossibleMainWordIDs = array(); foreach($aDatabaseWords as $aToken) { if (isset($aValidTokens[$aToken['word_token']])) @@ -293,6 +303,7 @@ { $aValidTokens[$aToken['word_token']] = array($aToken); } + if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1; } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); @@ -572,18 +583,24 @@ if (CONST_Debug) var_Dump($aGroupedSearches); - if ($bReverseInPlan && false) + if ($bReverseInPlan) { - foreach($aGroupedSearches as $iGroup => $aSearches) + $aCopyGroupedSearches = $aGroupedSearches; + foreach($aCopyGroupedSearches as $iGroup => $aSearches) { foreach($aSearches as $iSearch => $aSearch) { if (sizeof($aSearch['aAddress'])) { - $aReverseSearch = $aSearch; $iReverseItem = array_pop($aSearch['aAddress']); - $aReverseSearch['aName'][$iReverseItem] = $iReverseItem; - $aGroupedSearches[$iGroup][] = $aReverseSearch; + if (isset($aPossibleMainWordIDs[$iReverseItem])) + { + $aSearch['aAddress'] = array_merge($aSearch['aAddress'], $aSearch['aName']); + $aSearch['aName'] = array($iReverseItem); + $aGroupedSearches[$iGroup][] = $aSearch; + } +// $aReverseSearch['aName'][$iReverseItem] = $iReverseItem; + // $aGroupedSearches[$iGroup][] = $aReverseSearch; } } } @@ -692,17 +709,16 @@ } if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL"; if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc"; - if ($sViewboxSmallSQL) $aOrder[] = "ST_Contains($sViewboxSmallSQL, centroid) desc"; - if ($sViewboxLargeSQL) $aOrder[] = "ST_Contains($sViewboxLargeSQL, centroid) desc"; - $aOrder[] = "search_rank ASC"; + + $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.92-(search_rank::float/33) else importance end'; + + if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END"; + if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END"; + $aOrder[] = "$sImportanceSQL DESC"; if (sizeof($aTerms)) { $sSQL = "select place_id"; - if ($sViewboxSmallSQL) $sSQL .= ",ST_Contains($sViewboxSmallSQL, centroid) as in_small"; - else $sSQL .= ",false as in_small"; - if ($sViewboxLargeSQL) $sSQL .= ",ST_Contains($sViewboxLargeSQL, centroid) as in_large"; - else $sSQL .= ",false as in_large"; $sSQL .= " from search_name"; $sSQL .= " where ".join(' and ',$aTerms); $sSQL .= " order by ".join(', ',$aOrder); @@ -720,19 +736,21 @@ var_dump($sSQL, $aViewBoxPlaceIDs); exit; } - +//var_dump($aViewBoxPlaceIDs); // Did we have an viewbox matches? $aPlaceIDs = array(); $bViewBoxMatch = false; foreach($aViewBoxPlaceIDs as $aViewBoxRow) { - if ($bViewBoxMatch == 1 && $aViewBoxRow['in_small'] == 'f') break; - if ($bViewBoxMatch == 2 && $aViewBoxRow['in_large'] == 'f') break; - if ($aViewBoxRow['in_small'] == 't') $bViewBoxMatch = 1; - else if ($aViewBoxRow['in_large'] == 't') $bViewBoxMatch = 2; +// if ($bViewBoxMatch == 1 && $aViewBoxRow['in_small'] == 'f') break; +// if ($bViewBoxMatch == 2 && $aViewBoxRow['in_large'] == 'f') break; +// if ($aViewBoxRow['in_small'] == 't') $bViewBoxMatch = 1; +// else if ($aViewBoxRow['in_large'] == 't') $bViewBoxMatch = 2; $aPlaceIDs[] = $aViewBoxRow['place_id']; } } +//var_Dump($aPlaceIDs); +//exit; if ($aSearch['sHouseNumber'] && sizeof($aPlaceIDs)) { @@ -807,6 +825,9 @@ $aPlaceIDs = $oDB->getCol($sSQL); $sPlaceIDs = join(',',$aPlaceIDs); + if ($sPlaceIDs) + { + $fRange = 0.01; $sSQL = "select count(*) from pg_tables where tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'"; if ($oDB->getOne($sSQL)) @@ -830,7 +851,7 @@ { if (isset($aSearch['fRadius']) && $aSearch['fRadius']) $fRange = $aSearch['fRadius']; $sSQL = "select l.place_id from placex as l,placex as f where "; - $sSQL .= "f.place_id in ($sPlaceIDs) and ST_DWithin(l.geometry, st_centroid(f.geometry), $fRange) "; + $sSQL .= "f.place_id in ( $sPlaceIDs) and ST_DWithin(l.geometry, st_centroid(f.geometry), $fRange) "; $sSQL .= "and l.class='".$aSearch['sClass']."' and l.type='".$aSearch['sType']."' "; if (sizeof($aExcludePlaceIDs)) { @@ -842,6 +863,7 @@ if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $oDB->getCol($sSQL); } + } } } @@ -884,10 +906,11 @@ $sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,"; $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,"; $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, "; - $sSQL .= $sOrderSQL." as porder "; + $sSQL .= $sOrderSQL." as porder, "; + $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance "; $sSQL .= "from placex where place_id in ($sPlaceIDs) "; $sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank "; - $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code"; + $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance"; if (!$bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; $sSQL .= ",get_name_by_language(name, $sLanguagePrefArraySQL) "; @@ -898,24 +921,27 @@ $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= $sOrderSQL." as porder "; + $sSQL .= $sOrderSQL." as porder, "; + $sSQL .= "-0.15 as importance "; $sSQL .= "from location_property_tiger where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$bDeDupe) $sSQL .= ",place_id"; $sSQL .= " union "; - $sSQL .= "select 'T' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,"; + $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; - $sSQL .= $sOrderSQL." as porder "; + $sSQL .= $sOrderSQL." as porder, "; + $sSQL .= "-0.15 as importance "; $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; - $sSQL .= "order by rank_search,rank_address,porder asc"; + $sSQL .= "order by porder asc"; +// $sSQL .= "order by rank_search,rank_address,porder asc"; if (CONST_Debug) var_dump('
',$sSQL); $aSearchResults = $oDB->getAll($sSQL); //var_dump($sSQL,$aSearchResults);exit; @@ -934,9 +960,9 @@ { $sSearchResult = 'No Results Found'; } - +//var_Dump($aSearchResults); +//exit; $aClassType = getClassTypesWithImportance(); - foreach($aSearchResults as $iResNum => $aResult) { if (CONST_Search_AreaPolygons || true) @@ -1049,7 +1075,7 @@ } //if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']); - +/* if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance']) && $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance']) { @@ -1064,15 +1090,15 @@ { $aResult['importance'] = 1000000000000000; } - +*/ $aResult['name'] = $aResult['langaddress']; $aResult['foundorder'] = $iResNum; $aSearchResults[$iResNum] = $aResult; } - -//var_dump($aSearchResults);exit; uasort($aSearchResults, 'byImportance'); + +//var_dump($aSearchResults);exit; $aOSMIDDone = array(); $aClassTypeNameDone = array(); -- 2.39.5