From: Sarah Hoffmann Date: Wed, 8 Aug 2012 20:34:06 +0000 (+0200) Subject: Merge branch 'master' of http://github.com/twain47/Nominatim X-Git-Tag: deploy~687 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/932d945b7f897fd18be7d4c2fac3eda3915aa296?ds=sidebyside;hp=-c Merge branch 'master' of http://github.com/twain47/Nominatim Conflicts: lib/lib.php lib/log.php lib/template/address-jsonv2.php settings/settings.php sql/functions.sql sql/indices.src.sql sql/tables.sql utils/setup.php utils/specialphrases.php website/reverse.php --- 932d945b7f897fd18be7d4c2fac3eda3915aa296 diff --combined lib/log.php index 7515cbbd,5b847a41..d81ef46d --- a/lib/log.php +++ b/lib/log.php @@@ -5,6 -5,9 +5,9 @@@ $aStartTime = explode('.',microtime(true)); if (!$aStartTime[1]) $aStartTime[1] = '0'; + $sOutputFormat = ''; + if (isset($_GET['format'])) $sOutputFormat = $_GET['format']; + $hLog = array( date('Y-m-d H:i:s',$aStartTime[0]).'.'.$aStartTime[1], $_SERVER["REMOTE_ADDR"], @@@ -14,13 -17,20 +17,14 @@@ if (CONST_Log_DB) { - // Log - if ($sType == 'search') - { - $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')'); - } - - $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)'; + $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]); - $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')'; + $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($_GET['format']).','.getDBQuoted($hLog[3]).')'; $oDB->query($sSQL); } - if (CONST_Log_File && CONST_Log_File_ReverseLog != '') { + if (CONST_Log_File && CONST_Log_File_ReverseLog != '') + { if ($sType == 'reverse') { $aStartTime = explode('.',$hLog[0]); @@@ -33,7 -43,7 +37,7 @@@ $_GET['lon'].','. $_GET['zoom'].','. '"'.addslashes($_SERVER['HTTP_USER_AGENT']).'",'. - '"'.addslashes($_GET['format']).'"'."\n", + '"'.addslashes($sOutputFormat).'"'."\n", FILE_APPEND); } } @@@ -43,12 -53,18 +47,12 @@@ function logEnd(&$oDB, $hLog, $iNumResults) { - $aEndTime = explode('.',microtime(true)); - if (!$aEndTime[1]) $aEndTime[1] = '0'; - $sEndTime = date('Y-m-d H:i:s',$aEndTime[0]).'.'.$aEndTime[1]; + $aEndTime = explode('.',microtime(true)); + if (!$aEndTime[1]) $aEndTime[1] = '0'; + $sEndTime = date('Y-m-d H:i:s',$aEndTime[0]).'.'.$aEndTime[1]; if (CONST_Log_DB) { - $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; - $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); - $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); - $sSQL .= ' and query = '.getDBQuoted($hLog[3]); - $oDB->query($sSQL); - $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); @@@ -56,7 -72,8 +60,8 @@@ $oDB->query($sSQL); } - if (CONST_Log_File && CONST_Log_File_SearchLog != '') { + if (CONST_Log_File && CONST_Log_File_SearchLog != '') + { $aStartTime = explode('.',$hLog[0]); file_put_contents(CONST_Log_File_SearchLog, $aStartTime[0].','.$aStartTime[1].','. @@@ -65,7 -82,7 +70,7 @@@ '"'.addslashes($hLog[1]).'",'. '"'.addslashes($hLog[3]).'",'. '"'.addslashes($_SERVER['HTTP_USER_AGENT']).'",'. - '"'.addslashes($_GET['format']).'",'. + '"'.addslashes((isset($_GET['format']))?$_GET['format']:'').'",'. $iNumResults."\n", FILE_APPEND); } diff --combined settings/settings.php index ef2ebe4e,4ebc199d..df965211 --- a/settings/settings.php +++ b/settings/settings.php @@@ -7,28 -7,28 +7,30 @@@ @define('CONST_Database_DSN', 'pgsql://@/nominatim'); // Paths - @define('CONST_Postgresql_Version', '9.1'); + @define('CONST_Postgresql_Version', '9.0'); @define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/'.CONST_Postgresql_Version.'/contrib'); @define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-1.5'); - @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/../osm2pgsql/osm2pgsql'); - @define('CONST_Osmosis_Binary', CONST_BasePath.'/../osmosis-0.40.1/bin/osmosis'); + @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql'); - @define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.40.1/bin/osmosis'); ++ @define('CONST_Osmosis_Binary', CONST_BasePath.'/../osmosis-0.40.1/bin/osmosis'); // Website settings - @define('CONST_ClosedForIndexing', false); - @define('CONST_ClosedForIndexingExceptionIPs', ''); @define('CONST_BlockedIPs', ''); + @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks'); + @define('CONST_WhitelistedIPs', ''); + @define('CONST_BlockedUserAgents', ''); + @define('CONST_BlockReverseMaxLoad', 15); + @define('CONST_BulkUserIPs', ''); - @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/'); + @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/'); @define('CONST_Tile_Default', 'Mapnik'); - @define('CONST_Default_Language', 'xx'); + @define('CONST_Default_Language', 'en'); @define('CONST_Default_Lat', 20.0); @define('CONST_Default_Lon', 0.0); @define('CONST_Default_Zoom', 2); @define('CONST_Search_AreaPolygons_Enabled', true); + @define('CONST_Search_AreaPolygons', true); @define('CONST_Suggestions_Enabled', false); diff --combined sql/indices.src.sql index c586dd28,5e6ccd12..2e5dde71 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@@ -1,28 -1,30 +1,30 @@@ + -- Indices used only during search and update. + -- These indices are created only after the indexing process is done. + -CREATE INDEX idx_word_word_id on word USING BTREE (word_id); +CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd; - CREATE INDEX search_name_name_vector_idx ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; - CREATE INDEX searchnameplacesearch_search_nameaddress_vector_idx ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); ++CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; ++CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd; -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); +CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd; -CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id); -CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline); +CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd; DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); -CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; -CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id); +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0; +CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses'; - CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; - +CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid); -CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_nameaddress_vector ON search_name_country USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start - CREATE INDEX idx_search_name_-partition-_nameaddress_vector ON search_name_-partition- USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; -CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); +CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd; -- end + + CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type); diff --combined sql/partitions.src.sql index 9c078980,324f35bb..3fb7962c --- a/sql/partitions.src.sql +++ b/sql/partitions.src.sql @@@ -13,22 -13,33 +13,33 @@@ create type nearfeature as isguess boolean ); + drop type nearfeaturecentr cascade; + create type nearfeaturecentr as ( + place_id BIGINT, + keywords int[], + rank_address integer, + rank_search integer, + distance float, + isguess boolean, + centroid GEOMETRY + ); + CREATE TABLE location_area_country () INHERITS (location_area_large); -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_country () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id); -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); +CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); -CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); -CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; CREATE TABLE location_property_-partition- () INHERITS (location_property); CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id); @@@ -41,26 -52,26 +52,26 @@@ CREATE TABLE location_road_-partition- country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry); -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id); +CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd; -- end - create or replace function getNearFeatures(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_tokens INT[]) RETURNS setof nearfeature AS $$ + create or replace function getNearFeatures(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_tokens INT[]) RETURNS setof nearfeaturecentr AS $$ DECLARE - r nearfeature%rowtype; + r nearfeaturecentr%rowtype; BEGIN -- start IF in_partition = -partition- THEN FOR r IN - SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(point, centroid)) as distance, isguess FROM ( + SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(point, centroid)) as distance, isguess, centroid FROM ( SELECT * FROM location_area_large_-partition- WHERE ST_Contains(geometry, point) and rank_search < maxrank UNION ALL SELECT * FROM location_area_country WHERE ST_Contains(geometry, point) and rank_search < maxrank ) as location_area GROUP BY place_id, keywords, rank_address, rank_search, isguess, centroid - ORDER BY rank_address desc, isin_tokens && keywords desc, isguess asc, + ORDER BY rank_address, isin_tokens && keywords desc, isguess asc, ST_Distance(point, centroid) * CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2 -- capital city diff --combined sql/tables.sql index e56af7f6,d5cf7bcd..a0ad66a8 --- a/sql/tables.sql +++ b/sql/tables.sql @@@ -23,6 -23,18 +23,6 @@@ CREATE TABLE import_npi_log event text ); ---drop table IF EXISTS query_log; -CREATE TABLE query_log ( - starttime timestamp, - query text, - ipaddress text, - endtime timestamp, - results integer - ); -CREATE INDEX idx_query_log ON query_log USING BTREE (starttime); -GRANT INSERT ON query_log TO "www-data" ; -GRANT UPDATE ON query_log TO "www-data" ; - CREATE TABLE new_query_log ( type text, starttime timestamp, @@@ -30,7 -42,6 +30,7 @@@ useragent text, language text, query text, + searchterm text, endtime timestamp, results integer, format text, @@@ -41,6 -52,9 +41,6 @@@ GRANT INSERT ON new_query_log TO "www-d GRANT UPDATE ON new_query_log TO "www-data" ; GRANT SELECT ON new_query_log TO "www-data" ; -create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as -useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC; - --drop table IF EXISTS report_log; CREATE TABLE report_log ( starttime timestamp, @@@ -62,9 -76,9 +62,9 @@@ CREATE TABLE word country_code varchar(2), search_name_count INTEGER, operator TEXT - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('word', 'location', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_word_word_token on word USING BTREE (word_token); +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd; --CREATE INDEX idx_word_trigram ON word USING gin(word_trigram gin_trgm_ops) WITH (fastupdate = off); GRANT SELECT ON word TO "www-data" ; DROP SEQUENCE seq_word; @@@ -122,8 -136,8 +122,8 @@@ CREATE TABLE search_name_blank SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); drop table IF EXISTS search_name; -CREATE TABLE search_name () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); +CREATE TABLE search_name () INHERITS (search_name_blank) TABLESPACE ssd; +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@@ -133,8 -147,8 +133,8 @@@ isaddress boolean, distance float, cached_rank_address integer - ); -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id); + ) TABLESPACE data; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( @@@ -176,18 -190,7 +176,7 @@@ drop table placex CREATE TABLE placex ( place_id BIGINT NOT NULL, partition integer, - osm_type char(1), - osm_id INTEGER, - class TEXT NOT NULL, - type TEXT NOT NULL, - name HSTORE, - admin_level INTEGER, - housenumber TEXT, - street TEXT, - isin TEXT, - postcode TEXT, - country_code varchar(2), - extratags HSTORE, + LIKE place INCLUDING CONSTRAINTS, parent_place_id BIGINT, linked_place_id BIGINT, rank_address INTEGER, @@@ -195,13 -198,17 +184,17 @@@ importance FLOAT, indexed_status INTEGER, indexed_date TIMESTAMP, - geometry_sector INTEGER + wikipedia TEXT, -- calculated wikipedia article name (language:title) + geometry_sector INTEGER, + calculated_country_code varchar(2) - ); + ) TABLESPACE ssd; - SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2); + SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd; ++CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd; ++CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@@ -249,10 -256,10 +242,10 @@@ update placex set geometry_sector = geo drop index idx_placex_pendingbylatlon; drop index idx_placex_interpolation; drop index idx_placex_sector; -CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) +CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) TABLESPACE ssd where geometry_index(geometry_sector,indexed,name) IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed = false and class='place' and type='houses'; -CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed = false and class='place' and type='houses'; +CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; DROP SEQUENCE seq_postcodes; CREATE SEQUENCE seq_postcodes start 1; @@@ -270,7 -277,7 +263,7 @@@ CREATE TABLE import_polygon_error ); SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd; drop table import_polygon_delete; CREATE TABLE import_polygon_delete ( @@@ -279,7 -286,32 +272,32 @@@ class TEXT NOT NULL, type TEXT NOT NULL ); -CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd; drop sequence file; CREATE SEQUENCE file start 1; + + -- null table so it won't error + -- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it + CREATE TABLE wikipedia_article ( + language text NOT NULL, + title text NOT NULL, + langcount integer, + othercount integer, + totalcount integer, + lat double precision, + lon double precision, + importance double precision, + osm_type character(1), + osm_id bigint + ); + ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title); + CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id); + + CREATE TABLE wikipedia_redirect ( + language text, + from_title text, + to_title text + ); + ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title); + diff --combined utils/setup.php index 871f6e04,c842ba33..663a9694 --- a/utils/setup.php +++ b/utils/setup.php @@@ -18,17 -18,23 +18,23 @@@ array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'), array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'), array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'), + array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'), array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'), + array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'), + array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'), array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'), array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'), array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'), + array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'), array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'), + array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'), array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'), array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'), array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'), array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'), array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'), array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'), + array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'), array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'), array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'), array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'), @@@ -37,7 -43,7 +43,7 @@@ $bDidSomething = false; - // This is a pretty hard core defult - the number of processors in the box - 1 + // This is a pretty hard core default - the number of processors in the box - 1 $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1); if ($iInstances < 1) { @@@ -49,6 -55,15 +55,15 @@@ $iInstances = getProcessorCount(); echo "WARNING: resetting threads to $iInstances\n"; } + + // Assume we can steal all the cache memory in the box (unless told otherwise) + $iCacheMemory = (isset($aCMDResult['osm2pgsql-cache'])?$aCMDResult['osm2pgsql-cache']:getCacheMemoryMB()); + if ($iCacheMemory > getTotalMemoryMB()) + { + $iCacheMemory = getCacheMemoryMB(); + echo "WARNING: resetting cache memory to $iCacheMemory\n"; + } + if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date'])) { $sBaseFile = basename($aCMDResult['osm-file']); @@@ -71,7 -86,7 +86,7 @@@ { fail('database already exists ('.CONST_Database_DSN.')'); } - passthru('createdb '.$aDSNInfo['database']); + passthru('createdb -E UTF-8 '.$aDSNInfo['database']); } if ($aCMDResult['create-db'] || $aCMDResult['all']) @@@ -82,7 -97,7 +97,7 @@@ $oDB =& getDB(); passthru('createlang plpgsql '.$aDSNInfo['database']); - $pgver = (float) CONST_Postgresql_Version; + $pgver = (float) CONST_Postgresql_Version; if ($pgver < 9.1) { pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql'); } else { @@@ -106,12 -121,22 +121,23 @@@ $bDidSomething = true; $osm2pgsql = CONST_Osm2pgsql_Binary; - if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql"); - passthru($osm2pgsql.' --tablespace-slim-index data --tablespace-main-index ssd --tablespace-main-data data --tablespace-slim-data data -lsc -O gazetteer -C 12000 --hstore -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']); + if (!file_exists($osm2pgsql)) + { + echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n"; + fail("osm2pgsql not found in '$osm2pgsql'"); + } ++ $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data ssd'; + $osm2pgsql .= ' -lsc -O gazetteer --hstore'; - $osm2pgsql .= ' -C '.$iCacheMemory; ++ $osm2pgsql .= ' -C 16000'; + $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']; + passthruCheckReturn($osm2pgsql); $oDB =& getDB(); $x = $oDB->getRow('select * from place limit 1'); - if (!$x || PEAR::isError($x)) fail('No Data'); + if (PEAR::isError($x)) { + fail($x->getMessage()); + } + if (!$x) fail('No Data'); } if ($aCMDResult['create-functions'] || $aCMDResult['all']) @@@ -120,7 -145,9 +146,9 @@@ $bDidSomething = true; if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built"); $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql'); - $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate); + $sTemplate = str_replace('{modulepath}', CONST_BasePath.'/module', $sTemplate); + if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate); + if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate); pgsqlRunScript($sTemplate); } @@@ -194,6 -221,34 +222,34 @@@ pgsqlRunScript($sTemplate); } + if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) + { + $bDidSomething = true; + $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin'; + $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin'; + if (file_exists($sWikiArticlesFile)) + { + echo "Importing wikipedia articles..."; + pgsqlRunDropAndRestore($sWikiArticlesFile); + echo "...done\n"; + } + else + { + echo "WARNING: wikipedia article dump file not found - places will have default importance\n"; + } + if (file_exists($sWikiRedirectsFile)) + { + echo "Importing wikipedia redirects..."; + pgsqlRunDropAndRestore($sWikiRedirectsFile); + echo "...done\n"; + } + else + { + echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n"; + } + } + + if ($aCMDResult['load-data'] || $aCMDResult['all']) { echo "Load Data\n"; @@@ -219,6 -274,30 +275,30 @@@ if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection)); echo '.'; + $sSQL = 'select partition from country_name order by country_code'; + $aPartitions = $oDB->getCol($sSQL); + if (PEAR::isError($aPartitions)) + { + fail($aPartitions->getMessage()); + } + $aPartitions[] = 0; + foreach($aPartitions as $sPartition) + { + if (!pg_query($oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($oDB->connection)); + echo '.'; + } + + // pre-create the word list + if (!$aCMDResult['disable-token-precalc']) + { + if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection)); + echo '.'; + if (!pg_query($oDB->connection, 'select count(make_keywords(v)) from (select distinct postcode as v from place) as w where v is not null;')) fail(pg_last_error($oDB->connection)); + echo '.'; + if (!pg_query($oDB->connection, 'select count(getorcreate_housenumber_id(v)) from (select distinct housenumber as v from place where housenumber is not null) as w;')) fail(pg_last_error($oDB->connection)); + echo '.'; + } + $aDBInstances = array(); for($i = 0; $i < $iInstances; $i++) { @@@ -313,7 -392,7 +393,7 @@@ } fclose($hFile); - + $bAnyBusy = true; while($bAnyBusy) { @@@ -346,20 -425,30 +426,30 @@@ if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } - if ($aCMDResult['osmosis-init'] && isset($aCMDResult['osmosis-init-date'])) + if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date'])) { $bDidSomething = true; + $oDB =& getDB(); if (!file_exists(CONST_Osmosis_Binary)) fail("please download osmosis"); if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n"; else passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings'); $sDate = $aCMDResult['osmosis-init-date']; - $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate; + $aDate = date_parse_from_format("Y-m-d\TH-i", $sDate); + $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'; + $sURL .= 'Y='.$aDate['year'].'&m='.$aDate['month'].'&d='.$aDate['day']; + $sURL .= '&H='.$aDate['hour'].'&i='.$aDate['minute'].'&s=0'; + $sURL .= '&stream=minute'; echo "Getting state file: $sURL\n"; $sStateFile = file_get_contents($sURL); if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file"); file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile); + echo "Updating DB status\n"; + pg_query($oDB->connection, 'TRUNCATE import_status'); + $sSQL = "INSERT INTO import_status VALUES('".$sDate."')"; + pg_query($oDB->connection, $sSQL); + } if ($aCMDResult['index'] || $aCMDResult['all']) @@@ -368,11 -457,11 +458,11 @@@ $sOutputFile = ''; if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output']; $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile; - passthru($sBaseCmd.' -R 4'); - pgsqlRunScript('ANALYSE'); - passthru($sBaseCmd.' -r 5 -R 25'); - pgsqlRunScript('ANALYSE'); - passthru($sBaseCmd.' -r 26'); + passthruCheckReturn($sBaseCmd.' -R 4'); + if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE'); + passthruCheckReturn($sBaseCmd.' -r 5 -R 25'); + if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE'); + passthruCheckReturn($sBaseCmd.' -r 26'); } if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) @@@ -407,7 -496,12 +497,12 @@@ { $bDidSomething = true; $sTargetDir = $aCMDResult['create-website']; - if (!is_dir($sTargetDir)) fail('please specify a directory to setup'); + if (!is_dir($sTargetDir)) + { + echo "You must create the website directory before calling this function.\n"; + fail("Target directory does not exist."); + } + @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php'); @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php'); @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php'); @@@ -426,9 -520,10 +521,10 @@@ { if (!file_exists($sFilename)) fail('unable to find '.$sFilename); - // Convert database DSN to psql paramaters + // Convert database DSN to psql parameters $aDSNInfo = DB::parseDSN(CONST_Database_DSN); - $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database']; + if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; + $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -f '.$sFilename; $aDescriptors = array( 0 => array('pipe', 'r'), @@@ -453,10 -548,10 +549,10 @@@ function pgsqlRunScript($sScript) { - // Convert database DSN to psql paramaters + // Convert database DSN to psql parameters $aDSNInfo = DB::parseDSN(CONST_Database_DSN); if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; - $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']; + $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database']; $aDescriptors = array( 0 => array('pipe', 'r'), 1 => STDOUT, @@@ -474,3 -569,66 +570,66 @@@ fclose($ahPipes[0]); proc_close($hProcess); } + + function pgsqlRunRestoreData($sDumpFile) + { + // Convert database DSN to psql parameters + $aDSNInfo = DB::parseDSN(CONST_Database_DSN); + if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; + $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile; + + $aDescriptors = array( + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('file', '/dev/null', 'a') + ); + $ahPipes = null; + $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes); + if (!is_resource($hProcess)) fail('unable to start pg_restore'); + + fclose($ahPipes[0]); + + // TODO: error checking + while(!feof($ahPipes[1])) + { + echo fread($ahPipes[1], 4096); + } + fclose($ahPipes[1]); + + proc_close($hProcess); + } + + function pgsqlRunDropAndRestore($sDumpFile) + { + // Convert database DSN to psql parameters + $aDSNInfo = DB::parseDSN(CONST_Database_DSN); + if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; + $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile; + + $aDescriptors = array( + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('file', '/dev/null', 'a') + ); + $ahPipes = null; + $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes); + if (!is_resource($hProcess)) fail('unable to start pg_restore'); + + fclose($ahPipes[0]); + + // TODO: error checking + while(!feof($ahPipes[1])) + { + echo fread($ahPipes[1], 4096); + } + fclose($ahPipes[1]); + + proc_close($hProcess); + } + + function passthruCheckReturn($cmd) + { + $result = -1; + passthru($cmd, $result); + if ($result != 0) fail('Error executing external command: '.$cmd); + } diff --combined utils/specialphrases.php index 550f0c90,4381bdfa..94aa74d8 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@@ -10,11 -10,13 +10,13 @@@ array('help', 'h', 0, 1, 0, 0, false, 'Show Help'), array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), - array('countries', '', 0, 1, 0, 0, 'bool', 'Create import script for coutry codes and names'), + array('countries', '', 0, 1, 0, 0, 'bool', 'Create import script for country codes and names'), array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); + include(CONST_BasePath.'/settings/phrase_settings.php'); + if ($aCMDResult['countries']) { echo "select getorcreate_country(make_standard_name('uk'), 'gb');\n"; @@@ -30,7 -32,6 +32,6 @@@ if ($aCMDResult['wiki-import']) { - include(CONST_BasePath.'/settings/phrase_settings.php'); $aPairs = array(); foreach($aLanguageIn as $sLanguage) @@@ -85,20 -86,22 +86,23 @@@ foreach($aPairs as $aPair) { + if ($aPair[0] == 'yes') continue; + if ($aPair[1] == 'yes') continue; + if ($aPair[0] == 'highway') continue; if ($aPair[1] == 'highway') continue; + echo "drop table if exists place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1]).";\n"; echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as "; echo "select place_id as place_id,st_centroid(geometry) as centroid from placex where "; echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n"; - echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";"; + echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n"; } diff --combined website/reverse.php index bbad4a44,c155fa57..810be3c4 --- a/website/reverse.php +++ b/website/reverse.php @@@ -2,22 -2,6 +2,22 @@@ require_once(dirname(dirname(__FILE__)).'/lib/init-website.php'); require_once(CONST_BasePath.'/lib/log.php'); + if (preg_match(CONST_BlockedUserAgents, $_SERVER["HTTP_USER_AGENT"]) > 0) + { + $fLoadAvg = getLoadAverage(); + if ($fLoadAvg >= CONST_BlockReverseMaxLoad) { + header('HTTP/1.0 403 Forbidden'); + header('Content-type: text/html; charset=utf-8'); + echo "

App temporarily blocked

"; + echo "Your application has been temporarily blocked from the OpenStreetMap Nominatim "; + echo "geolocation service due to high server load."; + echo "\n\n"; + exit; + } + + } + + if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) { $fLoadAvg = getLoadAverage(); @@@ -44,8 -28,8 +44,8 @@@ $bShowAddressDetails = true; if (isset($_GET['addressdetails'])) $bShowAddressDetails = (bool)$_GET['addressdetails']; - // Prefered language - $aLangPrefOrder = getPrefferedLangauges(); + // Preferred language + $aLangPrefOrder = getPreferredLanguages(); $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]"; $hLog = logStart($oDB, 'reverse', $_SERVER['QUERY_STRING'], $aLangPrefOrder); @@@ -85,7 -69,7 +85,7 @@@ 18 => 30, // or >, Building 19 => 30, // or >, Building ); - $iMaxRank = isset($aZoomRank[$_GET['zoom']])?$aZoomRank[$_GET['zoom']]:28; + $iMaxRank = (isset($_GET['zoom']) && isset($aZoomRank[$_GET['zoom']]))?$aZoomRank[$_GET['zoom']]:28; // Find the nearest point $fSearchDiam = 0.0001; @@@ -117,12 -101,12 +117,12 @@@ $sSQL .= ' ORDER BY ST_distance('.$sPointSQL.', geometry) ASC limit 1'; //var_dump($sSQL); $aPlace = $oDB->getRow($sSQL); - $iPlaceID = $aPlace['place_id']; - $iParentPlaceID = $aPlace['parent_place_id']; - if (PEAR::IsError($iPlaceID)) + if (PEAR::IsError($aPlace)) { failInternalError("Could not determine closest place.", $sSQL, $iPlaceID); } + $iPlaceID = $aPlace['place_id']; + $iParentPlaceID = $aPlace['parent_place_id']; } // The point we found might be too small - use the address to find what it is a child of @@@ -160,13 -144,20 +160,20 @@@ $aAddress = getAddressDetails($oDB, $sLanguagePrefArraySQL, $iPlaceID, $aPlace['country_code']); } $aClassType = getClassTypes(); - $sAddressType = ''; - if (isset($aClassType[$aPlace['class'].':'.$aPlace['type'].':'.$aPlace['admin_level']])) - $sAddressType = $aClassType[$aPlace['class'].':'.$aPlace['type'].':'.$aPlace['admin_level']]['simplelabel']; - elseif (isset($aClassType[$aPlace['class'].':'.$aPlace['type']])) - $sAddressType = $aClassType[$aPlace['class'].':'.$aPlace['type']]['simplelabel']; - else $sAddressType = $aPlace['class']; - $aPlace['addresstype'] = $sAddressType; + $sAddressType = ''; + $sClassType = $aPlace['class'].':'.$aPlace['type'].':'.$aPlace['admin_level']; + if (isset($aClassType[$sClassType]) && isset($aClassType[$sClassType]['simplelabel'])) + { + $sAddressType = $aClassType[$aClassType]['simplelabel']; + } + else + { + $sClassType = $aPlace['class'].':'.$aPlace['type']; + if (isset($aClassType[$sClassType]) && isset($aClassType[$sClassType]['simplelabel'])) + $sAddressType = $aClassType[$sClassType]['simplelabel']; + else $sAddressType = $aPlace['class']; + } + $aPlace['addresstype'] = $sAddressType; } include(CONST_BasePath.'/lib/template/address-'.$sOutputFormat.'.php'); diff --combined website/search.php index 53314d51,5e2ff371..cc73513e --- a/website/search.php +++ b/website/search.php @@@ -36,8 -36,8 +36,8 @@@ // Show address breakdown $bShowAddressDetails = isset($_GET['addressdetails']) && $_GET['addressdetails']; - // Prefered language - $aLangPrefOrder = getPrefferedLangauges(); + // Preferred language + $aLangPrefOrder = getPreferredLanguages(); if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true; if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true; if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true; @@@ -53,7 -53,7 +53,7 @@@ } } - // Only certain ranks of feature + // Only certain ranks of feature if (isset($_GET['featureType']) && !isset($_GET['featuretype'])) $_GET['featuretype'] = $_GET['featureType']; if (isset($_GET['featuretype'])) @@@ -92,11 -92,11 +92,11 @@@ // Search query $sQuery = (isset($_GET['q'])?trim($_GET['q']):''); - if (!$sQuery && $_SERVER['PATH_INFO'] && $_SERVER['PATH_INFO'][0] == '/') + if (!$sQuery && isset($_SERVER['PATH_INFO']) && $_SERVER['PATH_INFO'][0] == '/') { $sQuery = substr($_SERVER['PATH_INFO'], 1); - // reverse order of '/' seperated string + // reverse order of '/' separated string $aPhrases = explode('/', $sQuery); $aPhrases = array_reverse($aPhrases); $sQuery = join(', ',$aPhrases); @@@ -244,6 -244,7 +244,7 @@@ $sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string"); $sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator'; $sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\',\'highway\')) or country_code is not null'; + if (CONST_Debug) var_Dump($sSQL); $aSearchWords = $oDB->getAll($sSQL); $aNewSearches = array(); foreach($aSearches as $aSearch) @@@ -309,12 -310,7 +310,12 @@@ // Check which tokens we have, get the ID numbers $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator'; $sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')'; - $sSQL .= ' and (class is null or class not in (\'highway\'))'; + // HACK WARNING + // (mis)using search_name_count to exclude words that return too many + // search results. saerch_name_count is currently set to 1 by hand + // because there is no fast way to extract this count from a live database. + $sSQL .= ' and search_name_count = 0'; +// $sSQL .= ' and (class is null or class not in (\'highway\'))'; // $sSQL .= ' group by word_token, word, class, type, location, country_code'; if (CONST_Debug) var_Dump($sSQL); @@@ -375,7 -371,8 +376,8 @@@ // Try and calculate GB postcodes we might be missing foreach($aTokens as $sToken) { - if (!isset($aValidTokens[$sToken]) && !isset($aValidTokens[' '.$sToken]) && preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) + // Source of gb postcodes is now definitive - always use + if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { if (substr($aData[1],-2,1) != ' ') { @@@ -417,7 -414,6 +419,6 @@@ Score how good the search is so they can be ordered */ - foreach($aPhrases as $iPhrase => $sPhrase) { $aNewPhraseSearches = array(); @@@ -450,12 -446,12 +451,12 @@@ if ($aSearch['sCountryCode'] === false) { $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); - // Country is almost always at the end of the string - increase score for finding it anywhere else (opimisation) + // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation) if ($iWordset+1 != sizeof($aPhrases[$iPhrase]['wordsets']) || $iPhrase+1 != sizeof($aPhrases)) $aSearch['iSearchRank'] += 5; if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; } } - elseif ($aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) + elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { if ($aSearch['fLat'] === '') { @@@ -504,7 -500,7 +505,7 @@@ if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; } } - else + elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { if (sizeof($aSearch['aName'])) { @@@ -531,6 -527,8 +532,8 @@@ // Allow searching for a word - but at extra cost foreach($aValidTokens[$sToken] as $aSearchTerm) { + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { //var_Dump('
',$aSearch['aName']); if (sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4) @@@ -550,6 -548,7 +553,7 @@@ $aSearch['iNamePhrase'] = $iPhrase; if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; } + } } } else @@@ -746,6 -745,9 +750,9 @@@ // First we need a position, either aName or fLat or both $aTerms = array(); $aOrder = array(); + + // TODO: filter out the pointless search terms (2 letter name tokens and less) + // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]"; if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@@ -787,16 -789,11 +794,16 @@@ $sSQL .= " limit ".$iLimit; if (CONST_Debug) var_dump($sSQL); + $iStartTime = time(); $aViewBoxPlaceIDs = $oDB->getAll($sSQL); if (PEAR::IsError($aViewBoxPlaceIDs)) { failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs); } + if (time() - $iStartTime > 60) { + file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND); + } + //var_dump($aViewBoxPlaceIDs); // Did we have an viewbox matches? $aPlaceIDs = array(); @@@ -1014,6 -1011,7 +1021,7 @@@ $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance "; $sSQL .= "from placex where place_id in ($sPlaceIDs) "; $sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank "; + $sSQL .= "and linked_place_id is null "; $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance"; if (!$bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; @@@ -1134,20 -1132,26 +1142,26 @@@ //var_Dump($aSearchResults); //exit; $aClassType = getClassTypesWithImportance(); + $aRecheckWords = preg_split('/\b/',$sQuery); + foreach($aRecheckWords as $i => $sWord) + { + if (!$sWord) unset($aRecheckWords[$i]); + } foreach($aSearchResults as $iResNum => $aResult) { - if (CONST_Search_AreaPolygons || true) + if (CONST_Search_AreaPolygons) { // Get the bounding box and outline polygon $sSQL = "select place_id,numfeatures,area,outline,"; - $sSQL .= "ST_Y(ST_PointN(ExteriorRing(ST_Box2D(outline)),4)) as minlat,ST_Y(ST_PointN(ExteriorRing(ST_Box2D(outline)),2)) as maxlat,"; - $sSQL .= "ST_X(ST_PointN(ExteriorRing(ST_Box2D(outline)),1)) as minlon,ST_X(ST_PointN(ExteriorRing(ST_Box2D(outline)),3)) as maxlon,"; + $sSQL .= "ST_Y(ST_PointN(ST_ExteriorRing(Box2D(outline)),4)) as minlat,ST_Y(ST_PointN(ST_ExteriorRing(Box2D(outline)),2)) as maxlat,"; + $sSQL .= "ST_X(ST_PointN(ST_ExteriorRing(Box2D(outline)),1)) as minlon,ST_X(ST_PointN(ST_ExteriorRing(Box2D(outline)),3)) as maxlon,"; $sSQL .= "ST_AsText(outline) as outlinestring from get_place_boundingbox_quick(".$aResult['place_id'].")"; $sSQL = "select place_id,0 as numfeatures,st_area(geometry) as area,"; - $sSQL .= "ST_Y(ST_PointN(ExteriorRing(ST_Box2D(geometry)),4)) as minlat,ST_Y(ST_PointN(ExteriorRing(ST_Box2D(geometry)),2)) as maxlat,"; - $sSQL .= "ST_X(ST_PointN(ExteriorRing(ST_Box2D(geometry)),1)) as minlon,ST_X(ST_PointN(ExteriorRing(ST_Box2D(geometry)),3)) as maxlon,"; - $sSQL .= "ST_AsText(geometry) as outlinestring from placex where place_id = ".$aResult['place_id'].' and st_geometrytype(ST_Box2D(geometry)) = \'ST_Polygon\''; + $sSQL .= "ST_Y(centroid) as centrelat,ST_X(centroid) as centrelon,"; + $sSQL .= "ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),4)) as minlat,ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),2)) as maxlat,"; + $sSQL .= "ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),1)) as minlon,ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),3)) as maxlon,"; + $sSQL .= "ST_AsText(geometry) as outlinestring from placex where place_id = ".$aResult['place_id'].' and st_geometrytype(Box2D(geometry)) = \'ST_Polygon\''; $aPointPolygon = $oDB->getRow($sSQL); if (PEAR::IsError($aPointPolygon)) { @@@ -1155,6 -1159,10 +1169,10 @@@ } if ($aPointPolygon['place_id']) { + if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null ) { + $aResult['lat'] = $aPointPolygon['centrelat']; + $aResult['lon'] = $aPointPolygon['centrelon']; + } // Translate geometary string to point array if (preg_match('#POLYGON\\(\\(([- 0-9.,]+)#',$aPointPolygon['outlinestring'],$aMatch)) { @@@ -1250,6 -1258,16 +1268,16 @@@ //exit; } + // Adjust importance for the number of exact string matches in the result + $aResult['importance'] = max(0.001,$aResult['importance']); + $iCountWords = 0; + $sAddress = $aResult['langaddress']; + foreach($aRecheckWords as $i => $sWord) + { + if (stripos($sAddress, $sWord)!==false) $iCountWords++; + } + $aResult['importance'] = $aResult['importance'] + $iCountWords; + //if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']); /* if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance']) @@@ -1271,7 -1289,6 +1299,6 @@@ $aResult['foundorder'] = $iResNum; $aSearchResults[$iResNum] = $aResult; } - uasort($aSearchResults, 'byImportance'); //var_dump($aSearchResults);exit; @@@ -1294,10 -1311,10 +1321,10 @@@ $bFirst = false; } if (!$bDeDupe || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']]) - && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['osm_class'].$aResult['name']]))) + && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['name']]))) { $aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']] = true; - $aClassTypeNameDone[$aResult['osm_type'].$aResult['osm_class'].$aResult['name']] = true; + $aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['name']] = true; $aSearchResults[] = $aResult; }