From e6d983474bc4f1720b2ef4460041b423b02fc172 Mon Sep 17 00:00:00 2001 From: Brian Quinion Date: Tue, 26 Oct 2010 15:22:41 +0000 Subject: [PATCH] data partitioning --- data/country_name.sql | 2 + nominatim/export.c | 20 +- nominatim/index.c | 38 ++- nominatim/nominatim.c | 5 + settings/settings.php | 4 + sql/functions.sql | 558 ++++++++++------------------------------- sql/loaddata.sql | 6 +- sql/partitions.src.sql | 48 ++-- sql/tables.sql | 69 ++--- utils/setup.php | 34 ++- 10 files changed, 273 insertions(+), 511 deletions(-) diff --git a/data/country_name.sql b/data/country_name.sql index 6bb18bd8..ab244b48 100644 --- a/data/country_name.sql +++ b/data/country_name.sql @@ -248,3 +248,5 @@ pf "name"=>"Polynésie française", "name:af"=>"Franse Polynesië", "name:an"=>" \. CREATE INDEX idx_country_name_country_code ON country_name USING btree (country_code); + +insert into country_name values('im','name'=>'Isle of Man','en'); diff --git a/nominatim/export.c b/nominatim/export.c index 422b44a1..dbd3c9b8 100644 --- a/nominatim/export.c +++ b/nominatim/export.c @@ -151,21 +151,33 @@ void nominatim_exportCreatePreparedQueries(PGconn * conn) res = PQprepare(conn, "placex_details", "select osm_type, osm_id, class, type, name, housenumber, country_code, ST_AsText(geometry), admin_level, rank_address, rank_search from placex where place_id = $1", 1, pg_prepare_params); - if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "Error preparing placex_details: %s", PQerrorMessage(conn)); + exit(EXIT_FAILURE); + } PQclear(res); pg_prepare_params[0] = PG_OID_INT8; res = PQprepare(conn, "placex_address", "select osm_type,osm_id,class,type,distance,cached_rank_address from place_addressline join placex on (address_place_id = placex.place_id) where isaddress and place_addressline.place_id = $1 and address_place_id != place_addressline.place_id order by cached_rank_address asc", 1, pg_prepare_params); - if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "Error preparing placex_address: %s", PQerrorMessage(conn)); + exit(EXIT_FAILURE); + } PQclear(res); pg_prepare_params[0] = PG_OID_INT8; res = PQprepare(conn, "placex_names", - "select (each(name)).key,(each(name)).value from (select keyvalueToHStore(name) as name from placex where place_id = $1) as x", + "select (each(name)).key,(each(name)).value from (select name as name from placex where place_id = $1) as x", 1, pg_prepare_params); - if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "Error preparing placex_names: %s", PQerrorMessage(conn)); + exit(EXIT_FAILURE); + } PQclear(res); } diff --git a/nominatim/index.c b/nominatim/index.c index e1c21a52..1ae414c0 100644 --- a/nominatim/index.c +++ b/nominatim/index.c @@ -51,7 +51,7 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co Oid pg_prepare_params[2]; - conn = PQconnectdb(conninfo); + conn = PQconnectdb(conninfo); if (PQstatus(conn) != CONNECTION_OK) { fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn)); exit(EXIT_FAILURE); @@ -59,17 +59,25 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co pg_prepare_params[0] = PG_OID_INT4; res = PQprepare(conn, "index_sectors", - "select geometry_sector,count(*) from placex where rank_search = $1 and indexed = false and name is not null group by geometry_sector order by geometry_sector", + "select geometry_sector,count(*) from placex where rank_search = $1 and indexed_status > 0 group by geometry_sector order by geometry_sector", 1, pg_prepare_params); - if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "Failed preparing index_sectors: %s\n", PQerrorMessage(conn)); + exit(EXIT_FAILURE); + } PQclear(res); pg_prepare_params[0] = PG_OID_INT4; pg_prepare_params[1] = PG_OID_INT4; res = PQprepare(conn, "index_sector_places", - "select place_id from placex where rank_search = $1 and geometry_index(geometry,indexed,name) = $2", + "select place_id from placex where rank_search = $1 and geometry_sector = $2 and indexed_status > 0", 2, pg_prepare_params); - if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "Failed preparing index_sector_places: %s\n", PQerrorMessage(conn)); + exit(EXIT_FAILURE); + } PQclear(res); // Build the data for each thread @@ -82,11 +90,15 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co exit(EXIT_FAILURE); } - pg_prepare_params[0] = PG_OID_INT8; + pg_prepare_params[0] = PG_OID_INT4; res = PQprepare(thread_data[i].conn, "index_placex", - "update placex set indexed = true where place_id = $1", + "update placex set indexed_status = 0 where place_id = $1", 1, pg_prepare_params); - if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "Failed preparing index_placex: %s\n", PQerrorMessage(conn)); + exit(EXIT_FAILURE); + } PQclear(res); nominatim_exportCreatePreparedQueries(thread_data[i].conn); @@ -159,7 +171,7 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co PQclear(resPlaces); exit(EXIT_FAILURE); } - if (PQftype(resPlaces, 0) != PG_OID_INT8) + if (PQftype(resPlaces, 0) != PG_OID_INT4) { fprintf(stderr, "Place_id value has unexpected type\n"); PQclear(resPlaces); @@ -236,8 +248,8 @@ void *nominatim_indexThread(void * thread_data_in) const char *paramValues[1]; int paramLengths[1]; int paramFormats[1]; - uint64_t paramPlaceID; - uint64_t place_id; + uint32_t paramPlaceID; + uint32_t place_id; while(1) { @@ -248,13 +260,13 @@ void *nominatim_indexThread(void * thread_data_in) break; } - place_id = PGint64(*((uint64_t *)PQgetvalue(thread_data->res, *thread_data->count, 0))); + place_id = PGint32(*((uint32_t *)PQgetvalue(thread_data->res, *thread_data->count, 0))); (*thread_data->count)++; pthread_mutex_unlock( thread_data->count_mutex ); //printf(" Processing place_id %ld\n", place_id); - paramPlaceID = PGint64(place_id); + paramPlaceID = PGint32(place_id); paramValues[0] = (char *)¶mPlaceID; paramLengths[0] = sizeof(paramPlaceID); paramFormats[0] = 1; diff --git a/nominatim/nominatim.c b/nominatim/nominatim.c index c9942539..72604bb6 100644 --- a/nominatim/nominatim.c +++ b/nominatim/nominatim.c @@ -202,6 +202,11 @@ int main(int argc, char *argv[]) } PQfinish(conn); + if (!index && !export && !import) + { + fprintf(stderr, "Please select index, export or import.\n"); + exit(EXIT_FAILURE); + } if (index) nominatim_index(0, 30, threads, conninfo, file); if (export) nominatim_export(0, 30, conninfo, file); if (import) nominatim_import(conninfo, tagsfile, file); diff --git a/settings/settings.php b/settings/settings.php index 84cfa3e5..f8075cb1 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -4,6 +4,10 @@ @define('CONST_Debug', false); @define('CONST_Database_DSN', 'pgsql://@/nominatim'); + // Paths + @define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/9.0/contrib'); + @define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-1.5'); + // Website settings @define('CONST_ClosedForIndexing', false); @define('CONST_ClosedForIndexingExceptionIPs', ''); diff --git a/sql/functions.sql b/sql/functions.sql index 75ab4797..8776e8a7 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -85,7 +85,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION geometry_index(place geometry,indexed BOOLEAN,name keyvalue[]) RETURNS INTEGER +CREATE OR REPLACE FUNCTION geometry_index(place geometry, indexed BOOLEAN, name HSTORE) RETURNS INTEGER AS $$ BEGIN IF indexed THEN RETURN NULL; END IF; @@ -95,7 +95,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION geometry_index(sector integer, indexed BOOLEAN, name keyvalue[]) RETURNS INTEGER +CREATE OR REPLACE FUNCTION geometry_index(sector integer, indexed BOOLEAN, name HSTORE) RETURNS INTEGER AS $$ BEGIN IF indexed THEN RETURN NULL; END IF; @@ -331,37 +331,6 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION add_keywords(a keyvalue[], b keyvalue[]) RETURNS keyvalue[] - AS $$ -DECLARE - i INTEGER; - j INTEGER; - f BOOLEAN; - r keyvalue[]; -BEGIN - IF array_upper(a, 1) IS NULL THEN - RETURN b; - END IF; - IF array_upper(b, 1) IS NULL THEN - RETURN a; - END IF; - r := a; - FOR i IN 1..array_upper(b, 1) LOOP - f := false; - FOR j IN 1..array_upper(a, 1) LOOP - IF (a[j].key = b[i].key) THEN - f := true; - END IF; - END LOOP; - IF NOT f THEN - r := r || b[i]; - END IF; - END LOOP; - RETURN r; -END; -$$ -LANGUAGE plpgsql IMMUTABLE; - CREATE OR REPLACE FUNCTION make_keywords(src HSTORE) RETURNS INTEGER[] AS $$ DECLARE @@ -493,30 +462,40 @@ DECLARE BEGIN place_centre := ST_Centroid(place); +--RAISE WARNING 'start: %', ST_AsText(place_centre); + -- Try for a OSM polygon first - FOR nearcountry IN select country_code from location_area where st_contains(area, place_centre) limit 1 + FOR nearcountry IN select country_code from location_area_country where country_code is not null and st_contains(geometry, place_centre) limit 1 LOOP RETURN nearcountry.country_code; END LOOP; - -- Try for an OSM polygon first, grid is faster +--RAISE WARNING 'osm fallback: %', ST_AsText(place_centre); + + -- Try for OSM fallback data FOR nearcountry IN select country_code from country_osm_grid where st_contains(geometry, place_centre) limit 1 LOOP RETURN nearcountry.country_code; END LOOP; +--RAISE WARNING 'natural earth: %', ST_AsText(place_centre); + -- Natural earth data (first fallback) -- FOR nearcountry IN select country_code from country_naturalearthdata where st_contains(geometry, place_centre) limit 1 -- LOOP -- RETURN nearcountry.country_code; -- END LOOP; +--RAISE WARNING 'in country: %', ST_AsText(place_centre); + -- WorldBoundaries data (second fallback - think there might be something broken in this data) FOR nearcountry IN select country_code from country where st_contains(geometry, place_centre) limit 1 LOOP RETURN nearcountry.country_code; END LOOP; +--RAISE WARNING 'near country: %', ST_AsText(place_centre); + -- Still not in a country - try nearest within ~12 miles of a country FOR nearcountry IN select country_code from country where st_distance(geometry, place_centre) < 0.5 order by st_distance(geometry, place) limit 1 @@ -543,7 +522,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT) RETURNS BOOLEAN +CREATE OR REPLACE FUNCTION delete_location(OLD_place_id INTEGER) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -555,9 +534,9 @@ $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION add_location( - place_id BIGINT, + place_id INTEGER, place_country_code varchar(2), - name keyvalue[], + name hstore, rank_search INTEGER, rank_address INTEGER, geometry GEOMETRY @@ -567,6 +546,7 @@ CREATE OR REPLACE FUNCTION add_location( DECLARE keywords INTEGER[]; country_code VARCHAR(2); + partition VARCHAR(10); locationid INTEGER; isarea BOOLEAN; xmin INTEGER; @@ -575,8 +555,10 @@ DECLARE ymax INTEGER; lon INTEGER; lat INTEGER; + centroid GEOMETRY; secgeo GEOMETRY; diameter FLOAT; + x BOOLEAN; BEGIN -- Allocate all tokens ids - prevents multi-processor race condition later on at cost of slowing down import @@ -588,11 +570,16 @@ BEGIN country_code := get_country_code(geometry); END IF; country_code := lower(place_country_code); + partition := country_code; + IF partition is null THEN + partition := 'none'; + END IF; isarea := false; IF (ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(geometry)) THEN isArea := true; + centroid := ST_Centroid(geometry); xmin := floor(st_xmin(geometry)); xmax := ceil(st_xmax(geometry)); @@ -600,23 +587,19 @@ BEGIN ymax := ceil(st_ymax(geometry)); IF xmin = xmax OR ymin = ymax OR (xmax-xmin < 2 AND ymax-ymin < 2) THEN - INSERT INTO location_area values (place_id, country_code, name, keywords, - rank_search, rank_address, false, ST_Centroid(geometry), geometry); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, centroid, geometry); ELSE FOR lon IN xmin..(xmax-1) LOOP FOR lat IN ymin..(ymax-1) LOOP secgeo := st_intersection(geometry, ST_SetSRID(ST_MakeBox2D(ST_Point(lon,lat),ST_Point(lon+1,lat+1)),4326)); IF NOT ST_IsEmpty(secgeo) AND ST_GeometryType(secgeo) in ('ST_Polygon','ST_MultiPolygon') THEN - INSERT INTO location_area values (place_id, country_code, name, keywords, - rank_search, rank_address, false, ST_Centroid(geometry), - st_intersection(geometry, ST_SetSRID(ST_MakeBox2D(ST_Point(lon,lat),ST_Point(lon+1,lat+1)),4326)) - ); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, centroid, secgeo); END IF; END LOOP; END LOOP; END IF; - ELSE + ELSEIF rank_search < 26 THEN diameter := 0.02; IF rank_search = 14 THEN @@ -629,77 +612,25 @@ BEGIN diameter := 0.05; ELSEIF rank_search = 25 THEN diameter := 0.005; - ELSEIF rank_search = 26 THEN - diameter := 0.001; END IF; secgeo := ST_Buffer(geometry, diameter); - INSERT INTO location_area values (place_id, country_code, name, keywords, - rank_search, rank_address, true, ST_Centroid(geometry), secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, ST_Centroid(geometry), secgeo); - END IF; + ELSE - INSERT INTO location_point values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); + -- ~ 20meters + secgeo := ST_Buffer(geometry, 0.0002); + x := insertLocationAreaRoadNear(partition, place_id, country_code, keywords, rank_search, rank_address, false, ST_Centroid(geometry), secgeo); - RETURN true; + -- ~ 100meters + secgeo := ST_Buffer(geometry, 0.001); + x := insertLocationAreaRoadFar(partition, place_id, country_code, keywords, rank_search, rank_address, false, ST_Centroid(geometry), secgeo); + + END IF; - IF not isarea THEN - IF rank_search < 26 THEN - INSERT INTO location_point_26 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 25 THEN - INSERT INTO location_point_25 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 24 THEN - INSERT INTO location_point_24 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 23 THEN - INSERT INTO location_point_23 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 22 THEN - INSERT INTO location_point_22 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 21 THEN - INSERT INTO location_point_21 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 20 THEN - INSERT INTO location_point_20 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 19 THEN - INSERT INTO location_point_19 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 18 THEN - INSERT INTO location_point_18 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 17 THEN - INSERT INTO location_point_17 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 16 THEN - INSERT INTO location_point_16 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 15 THEN - INSERT INTO location_point_15 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 14 THEN - INSERT INTO location_point_14 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 13 THEN - INSERT INTO location_point_13 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 12 THEN - INSERT INTO location_point_12 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 11 THEN - INSERT INTO location_point_11 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 10 THEN - INSERT INTO location_point_10 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 9 THEN - INSERT INTO location_point_9 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 8 THEN - INSERT INTO location_point_8 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 7 THEN - INSERT INTO location_point_7 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 6 THEN - INSERT INTO location_point_6 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 5 THEN - INSERT INTO location_point_5 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 4 THEN - INSERT INTO location_point_4 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 3 THEN - INSERT INTO location_point_3 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 2 THEN - INSERT INTO location_point_2 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - IF rank_search < 1 THEN - INSERT INTO location_point_1 values (place_id,country_code,name,keywords,rank_search,rank_address,isarea,ST_Centroid(geometry)); - END IF;END IF;END IF;END IF;END IF;END IF;END IF;END IF;END IF;END IF; - END IF;END IF;END IF;END IF;END IF;END IF;END IF;END IF;END IF;END IF; - END IF;END IF;END IF;END IF;END IF;END IF;END IF; RETURN true; + END IF; RETURN false; @@ -708,9 +639,9 @@ $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION update_location( - place_id BIGINT, + place_id INTEGER, place_country_code varchar(2), - name keyvalue[], + name hstore, rank_search INTEGER, rank_address INTEGER, geometry GEOMETRY @@ -726,7 +657,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION search_name_add_words(parent_place_id BIGINT, to_add INTEGER[]) +CREATE OR REPLACE FUNCTION search_name_add_words(parent_place_id INTEGER, to_add INTEGER[]) RETURNS BOOLEAN AS $$ DECLARE @@ -757,7 +688,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION update_location_nameonly(OLD_place_id BIGINT, name keyvalue[]) RETURNS BOOLEAN +CREATE OR REPLACE FUNCTION update_location_nameonly(OLD_place_id INTEGER, name hstore) RETURNS BOOLEAN AS $$ DECLARE newkeywords INTEGER[]; @@ -784,42 +715,13 @@ BEGIN END IF; UPDATE location_area set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_0 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_1 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_2 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_3 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_4 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_5 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_6 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_7 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_8 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_9 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_10 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_11 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_12 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_13 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_14 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_15 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_16 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_17 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_18 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_19 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_20 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_21 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_22 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_23 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_24 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_25 set keywords = newkeywords where place_id = OLD_place_id; - UPDATE location_point_26 set keywords = newkeywords where place_id = OLD_place_id; - RETURN search_name_add_words(OLD_place_id, addedkeywords); END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION create_interpolation(wayid BIGINT, interpolationtype TEXT) RETURNS INTEGER +CREATE OR REPLACE FUNCTION create_interpolation(wayid INTEGER, interpolationtype TEXT) RETURNS INTEGER AS $$ DECLARE @@ -851,10 +753,10 @@ BEGIN FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP - select min(place_id) from placex where osm_type = 'N' and osm_id = waynodes[nodeidpos]::bigint and type = 'house' INTO search_place_id; + select min(place_id) from placex where osm_type = 'N' and osm_id = waynodes[nodeidpos]::INTEGER and type = 'house' INTO search_place_id; IF search_place_id IS NULL THEN -- null record of right type - select * from placex where osm_type = 'N' and osm_id = waynodes[nodeidpos]::bigint and type = 'house' limit 1 INTO nextnode; + select * from placex where osm_type = 'N' and osm_id = waynodes[nodeidpos]::INTEGER and type = 'house' limit 1 INTO nextnode; select ST_SetSRID(ST_Point(lon::float/10000000,lat::float/10000000),4326) from planet_osm_nodes where id = waynodes[nodeidpos] INTO nextnode.geometry; ELSE select * from placex where place_id = search_place_id INTO nextnode; @@ -909,7 +811,10 @@ BEGIN FOR housenum IN startnumber..endnumber BY stepsize LOOP -- this should really copy postcodes but it puts a huge burdon on the system for no big benefit -- ideally postcodes should move up to the way - insert into placex values (null,'N',prevnode.osm_id,prevnode.class,prevnode.type,NULL,prevnode.admin_level,housenum,prevnode.street,prevnode.isin,null,prevnode.country_code,prevnode.street_place_id,prevnode.rank_address,prevnode.rank_search,false,ST_Line_Interpolate_Point(linegeo, (housenum::float-orginalstartnumber::float)/originalnumberrange::float)); + insert into placex (osm_type, osm_id, class, type, admin_level, housenumber, street, isin, + country_code, street_place_id, rank_address, rank_search, indexed_status, geometry) + values ('N',prevnode.osm_id, prevnode.class, prevnode.type, prevnode.admin_level, housenum, prevnode.street, prevnode.isin, + prevnode.country_code, prevnode.street_place_id, prevnode.rank_address, prevnode.rank_search, 1, ST_Line_Interpolate_Point(linegeo, (housenum::float-orginalstartnumber::float)/originalnumberrange::float)); newpoints := newpoints + 1; --RAISE WARNING 'interpolation number % % ',prevnode.place_id,housenum; END LOOP; @@ -947,7 +852,6 @@ DECLARE country_code VARCHAR(2); diameter FLOAT; BEGIN --- RAISE WARNING '%',NEW.osm_id; -- RAISE WARNING '%',NEW.osm_id; -- just block these @@ -958,7 +862,6 @@ BEGIN RETURN null; END IF; --- RAISE WARNING '%',NEW.osm_id; IF ST_IsEmpty(NEW.geometry) OR NOT ST_IsValid(NEW.geometry) OR ST_X(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') THEN -- block all invalid geometary - just not worth the risk. seg faults are causing serious problems. RETURN NULL; @@ -976,8 +879,11 @@ BEGIN END IF; NEW.place_id := nextval('seq_place'); - NEW.indexed := false; + NEW.indexed_status := 1; NEW.country_code := lower(NEW.country_code); + IF NEW.country_code is null THEN + NEW.country_code := get_country_code(NEW.geometry); + END IF; NEW.geometry_sector := geometry_sector(NEW.geometry); IF NEW.admin_level > 15 THEN @@ -1051,31 +957,26 @@ BEGIN -- Postcode processing is very country dependant IF NEW.country_code IS NULL THEN - NEW.country_code := get_country_code(NEW.geometry); END IF; - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; + NEW.name := 'ref'=>NEW.postcode; IF NEW.country_code = 'gb' THEN IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN NEW.rank_search := 25; NEW.rank_address := 5; - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN NEW.rank_search := 23; NEW.rank_address := 5; - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN NEW.rank_search := 21; NEW.rank_address := 5; - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; END IF; ELSEIF NEW.country_code = 'de' THEN IF NEW.postcode ~ '^([0-9]{5})$' THEN - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; NEW.rank_search := 21; NEW.rank_address := 11; END IF; @@ -1083,7 +984,6 @@ BEGIN ELSE -- Guess at the postcode format and coverage (!) IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; NEW.rank_search := 21; NEW.rank_address := 11; ELSE @@ -1091,15 +991,9 @@ BEGIN postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); IF postcode IS NOT NULL THEN - - -- TODO: insert new line into location instead - --result := add_location(NEW.place_id,NEW.country_code,ARRAY[ROW('ref',postcode)::keyvalue],21,11,NEW.geometry); - - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; NEW.rank_search := 25; NEW.rank_address := 11; ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN - NEW.name := ARRAY[ROW('ref',NEW.postcode)::keyvalue]; NEW.rank_search := 21; NEW.rank_address := 11; END IF; @@ -1119,9 +1013,6 @@ BEGIN END IF; ELSEIF NEW.class = 'boundary' THEN - IF NEW.country_code is null THEN - NEW.country_code := get_country_code(NEW.geometry); - END IF; NEW.rank_search := NEW.admin_level * 2; NEW.rank_address := NEW.rank_search; ELSEIF NEW.class = 'landuse' AND ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') THEN @@ -1171,11 +1062,11 @@ BEGIN -- RETURN NULL; -- END IF; - IF array_upper(NEW.name, 1) is not null THEN + IF NEW.name is not null THEN result := add_location(NEW.place_id,NEW.country_code,NEW.name,NEW.rank_search,NEW.rank_address,NEW.geometry); END IF; - --RETURN NEW; + RETURN NEW; -- The following is not needed until doing diff updates, and slows the main index process down IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN @@ -1233,14 +1124,12 @@ TRIGGER DECLARE place_centroid GEOMETRY; - place_geometry_text TEXT; search_maxdistance FLOAT[]; search_mindistance FLOAT[]; address_havelevel BOOLEAN[]; -- search_scores wordscore[]; -- search_scores_pos INTEGER; - search_country_code_conflict BOOLEAN; i INTEGER; iMax FLOAT; @@ -1253,18 +1142,20 @@ DECLARE address_street_word_id INTEGER; street_place_id_count INTEGER; isin TEXT[]; - tagpairid INTEGER; + isin_tokens INT[]; + + location_rank_search INTEGER; + location_distance FLOAT; - bPointCountryCode BOOLEAN; + tagpairid INTEGER; name_vector INTEGER[]; nameaddress_vector INTEGER[]; result BOOLEAN; - BEGIN --- RAISE WARNING '%',NEW.place_id; +--RAISE WARNING '%',NEW.place_id; --RAISE WARNING '%', NEW; IF NEW.class = 'place' AND NEW.type = 'postcodearea' THEN @@ -1272,91 +1163,41 @@ BEGIN RETURN NEW; END IF; + IF NEW.country_code is null THEN + NEW.country_code := get_country_code(NEW.geometry); + END IF; NEW.country_code := lower(NEW.country_code); + NEW.partition := NEW.country_code; + IF NEW.partition is null THEN + NEW.partition := 'none'; + END IF; + + IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN - IF NEW.indexed and NOT OLD.indexed THEN + NEW.indexed_date = now(); IF NEW.class = 'place' AND NEW.type = 'houses' THEN i := create_interpolation(NEW.osm_id, NEW.housenumber); RETURN NEW; END IF; ---RAISE WARNING 'PROCESSING: % %', NEW.place_id, NEW.name; - - search_country_code_conflict := false; - DELETE FROM search_name WHERE place_id = NEW.place_id; ---RAISE WARNING 'x1'; DELETE FROM place_addressline WHERE place_id = NEW.place_id; ---RAISE WARNING 'x2'; DELETE FROM place_boundingbox where place_id = NEW.place_id; -- Adding ourselves to the list simplifies address calculations later INSERT INTO place_addressline VALUES (NEW.place_id, NEW.place_id, true, true, 0, NEW.rank_address); ---RAISE WARNING 'x3'; -- What level are we searching from search_maxrank := NEW.rank_search; - -- Default max/min distances to look for a location - FOR i IN 1..28 LOOP - search_maxdistance[i] := 1; - search_mindistance[i] := 0.0; - address_havelevel[i] := false; - END LOOP; - -- Minimum size to search, can be larger but don't let it shink below this - search_mindistance[14] := 0.2; - search_mindistance[15] := 0.1; - search_mindistance[16] := 0.05; - search_mindistance[17] := 0.03; - search_mindistance[18] := 0.015; - search_mindistance[19] := 0.008; - search_mindistance[20] := 0.006; - search_mindistance[21] := 0.004; - search_mindistance[22] := 0.003; - search_mindistance[23] := 0.002; - search_mindistance[24] := 0.002; - search_mindistance[25] := 0.001; - search_mindistance[26] := 0.001; - - search_maxdistance[14] := 1; - search_maxdistance[15] := 0.5; - search_maxdistance[16] := 0.15; - search_maxdistance[17] := 0.05; - search_maxdistance[18] := 0.02; - search_maxdistance[19] := 0.02; - search_maxdistance[20] := 0.02; - search_maxdistance[21] := 0.02; - search_maxdistance[22] := 0.02; - search_maxdistance[23] := 0.02; - search_maxdistance[24] := 0.02; - search_maxdistance[25] := 0.02; - search_maxdistance[26] := 0.02; - -- Speed up searches - just use the centroid of the feature -- cheaper but less acurate place_centroid := ST_Centroid(NEW.geometry); - place_geometry_text := 'ST_GeomFromText('''||ST_AsText(NEW.geometry)||''','||ST_SRID(NEW.geometry)||')'; - - -- copy the building number to the name - -- done here rather than on insert to avoid initial indexing - -- TODO: This might be a silly thing to do - --IF (NEW.name IS NULL OR array_upper(NEW.name,1) IS NULL) AND NEW.housenumber IS NOT NULL THEN - -- NEW.name := ARRAY[ROW('ref',NEW.housenumber)::keyvalue]; - --END IF; - - --Temp hack to prevent need to re-index - IF NEW.name::text = '{"(ref,'||NEW.housenumber||')"}' THEN - NEW.name := NULL; - END IF; - --IF (NEW.name IS NULL OR array_upper(NEW.name,1) IS NULL) AND NEW.type IS NOT NULL THEN - -- NEW.name := ARRAY[ROW('type',NEW.type)::keyvalue]; - --END IF; - - -- Initialise the name and address vectors using our name + -- Initialise the name vector using our name name_vector := make_keywords(NEW.name); - nameaddress_vector := name_vector; + nameaddress_vector := '{}'::int[]; -- some tag combinations add a special id for search tagpairid := get_tagpair(NEW.class,NEW.type); @@ -1487,22 +1328,8 @@ BEGIN --RAISE WARNING 'x4'; -<<<<<<< .mine IF NEW.street_place_id IS NULL THEN - FOR location IN SELECT place_id - FROM location_area - WHERE ST_Contains(area, place_centroid) and location_area.rank_search = 26 - ORDER BY ST_Distance(place_centroid, centroid) ASC limit 1 -======= - search_diameter := 0.00005; - WHILE NEW.street_place_id IS NULL AND search_diameter < 0.1 LOOP ---RAISE WARNING '% %', search_diameter,ST_AsText(ST_Centroid(NEW.geometry)); - FOR location IN SELECT place_id FROM placex - WHERE ST_DWithin(place_centroid, placex.geometry, search_diameter) and rank_search between 22 and 27 - ORDER BY ST_distance(NEW.geometry, placex.geometry) ASC limit 1 ->>>>>>> .r23726 - LOOP ---RAISE WARNING 'using nearest street,% % %',search_diameter,NEW.street,location; + FOR location IN SELECT place_id FROM getNearRoads(NEW.partition, place_centroid) LOOP NEW.street_place_id := location.place_id; END LOOP; END IF; @@ -1513,10 +1340,11 @@ BEGIN IF NEW.street_place_id IS NOT NULL THEN -- Some unnamed roads won't have been indexed, index now if needed - select count(*) from place_addressline where place_id = NEW.street_place_id INTO street_place_id_count; - IF street_place_id_count = 0 THEN - UPDATE placex set indexed = true where indexed = false and place_id = NEW.street_place_id; - END IF; +-- ALL are now indexed! +-- select count(*) from place_addressline where place_id = NEW.street_place_id INTO street_place_id_count; +-- IF street_place_id_count = 0 THEN +-- UPDATE placex set indexed = true where indexed = false and place_id = NEW.street_place_id; +-- END IF; -- Add the street to the address as zero distance to force to front of list INSERT INTO place_addressline VALUES (NEW.place_id, NEW.street_place_id, true, true, 0, 26); @@ -1552,178 +1380,72 @@ BEGIN --RAISE WARNING ' INDEXING: %',NEW; - -- Process area matches (tend to be better quality) - FOR location IN SELECT - place_id, - name, - keywords, - country_code, - rank_address, - rank_search, - ST_Distance(place_centroid, centroid) as distance - FROM location_area - WHERE ST_Contains(area, place_centroid) and location_area.rank_search < search_maxrank - ORDER BY ST_Distance(place_centroid, centroid) ASC - LOOP - ---RAISE WARNING ' AREA: % % %',location.keywords,NEW.country_code,location.country_code; - - IF NEW.country_code IS NULL THEN - NEW.country_code := location.country_code; - ELSEIF NEW.country_code != location.country_code and location.rank_search > 3 THEN - search_country_code_conflict := true; - END IF; - - -- Add it to the list of search terms - nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, true, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); - address_havelevel[location.rank_address] := true; - - END LOOP; - - -- try using the isin value to find parent places - address_maxrank := search_maxrank; + -- convert isin to array of tokenids + isin_tokens := '{}'::int[]; IF NEW.isin IS NOT NULL THEN - -- Doing isin without a country code seems to be dangerous - IF NEW.country_code is null THEN - NEW.country_code := get_country_code(place_centroid); - END IF; isin := regexp_split_to_array(NEW.isin, E'[;,]'); - FOR i IN 1..array_upper(isin, 1) LOOP - address_street_word_id := get_name_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL THEN ---RAISE WARNING ' search: %',address_street_word_id; - FOR location IN SELECT place_id,keywords,rank_search,location_point.country_code,rank_address, - ST_Distance(place_centroid, search_name.centroid) as distance - FROM search_name join location_point using (place_id) - WHERE search_name.name_vector @> ARRAY[address_street_word_id] - AND rank_search < NEW.rank_search - AND (NEW.country_code IS NULL OR search_name.country_code = NEW.country_code OR search_name.address_rank < 4) - ORDER BY ST_distance(NEW.geometry, search_name.centroid) ASC limit 1 - LOOP - - IF NEW.country_code IS NULL THEN - NEW.country_code := location.country_code; - ELSEIF NEW.country_code != location.country_code and location.rank_search > 3 THEN - search_country_code_conflict := true; - END IF; - ---RAISE WARNING ' found: %',location.place_id; - nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); - - IF address_maxrank > location.rank_address THEN - address_maxrank := location.rank_address; - END IF; - END LOOP; - END IF; - END LOOP; - FOR i IN address_maxrank..28 LOOP - address_havelevel[i] := true; - END LOOP; + IF array_upper(isin, 1) IS NOT NULL THEN + FOR i IN 1..array_upper(isin, 1) LOOP + address_street_word_id := get_name_id(make_standard_name(isin[i])); + IF address_street_word_id IS NOT NULL THEN + isin_tokens := isin_tokens + address_street_word_id; + END IF; + END LOOP; + END IF; + isin_tokens := uniq(sort(isin_tokens)); END IF; - -- If we have got a consistent country code from the areas and/or isin then we don't care about points (too inacurate) - bPointCountryCode := NEW.country_code IS NULL; + -- Process area matches + location_rank_search := 100; + location_distance := 0; + FOR location IN SELECT * from getNearFeatures(NEW.partition, place_centroid, search_maxrank, isin_tokens) LOOP - IF true THEN - -- full search using absolute position +--RAISE WARNING ' AREA: % % %',location.keywords,NEW.country_code,location.country_code; - search_diameter := 0; - -- 16 = city, anything larger tends to be an area so don't continue - WHILE FALSE AND search_diameter < 1 AND search_maxrank > 16 LOOP + IF location.rank_search < location_rank_search THEN + location_rank_search := location.rank_search; + location_distance := location.distance * 1.5; + END IF; --- RAISE WARNING 'Nearest: % %', search_diameter, search_maxrank; + IF location.distance < location_distance THEN - search_prevdiameter := search_diameter; - IF search_diameter = 0 THEN - search_diameter := 0.001; - ELSE - search_diameter := search_diameter * 2; - END IF; + -- Add it to the list of search terms + nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); + INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, true, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); + address_havelevel[location.rank_address] := true; ---RAISE WARNING '%', 'SELECT place_id, name, keywords, country_code, rank_address, rank_search,'|| --- 'ST_Distance('||place_geometry_text||', centroid) as distance,'|| --- 'ST_Distance('||place_geometry_text||', centroid) as maxdistance'|| -- this version of postgis doesnt have maxdistance ! --- ' FROM location_point_'||(case when search_maxrank > 26 THEN 26 ELSE search_maxrank end)|| --- ' WHERE ST_DWithin('||place_geometry_text||', centroid, '||search_diameter||') '|| --- ' AND ST_Distance('||place_geometry_text||', centroid) > '||search_prevdiameter|| --- ' ORDER BY ST_Distance('||place_geometry_text||', centroid) ASC'; - - -- Try nearest - FOR location IN EXECUTE 'SELECT place_id, name, keywords, country_code, rank_address, rank_search,'|| - 'ST_Distance('||place_geometry_text||', centroid) as distance,'|| - 'ST_Distance('||place_geometry_text||', centroid) as maxdistance'|| -- this version of postgis doesnt have maxdistance ! - ' FROM location_point_'||(case when search_maxrank > 26 THEN 26 ELSE search_maxrank end)|| - ' WHERE ST_DWithin('||place_geometry_text||', centroid, '||search_diameter||') '|| - ' AND ST_Distance('||place_geometry_text||', centroid) >= '||search_prevdiameter|| - ' ORDER BY ST_Distance('||place_geometry_text||', centroid) ASC' - LOOP - - IF bPointCountryCode THEN - IF NEW.country_code IS NULL THEN - NEW.country_code := location.country_code; - ELSEIF NEW.country_code != location.country_code THEN - search_country_code_conflict := true; - END IF; - END IF; - - -- Find search words ---RAISE WARNING 'IF % % % %', location.name, location.distance, location.rank_search, search_maxdistance; ---RAISE WARNING ' POINT: % % % % %', location.name, location.rank_search, location.place_id, location.distance, search_maxdistance[location.rank_search]; - IF (location.distance < search_maxdistance[location.rank_search]) THEN ---RAISE WARNING ' adding'; - -- Add it to the list of search terms, de-duplicate - nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - - iMax := (location.maxdistance*1.5)::float; - FOR i IN location.rank_search..28 LOOP - IF iMax < search_maxdistance[i] THEN ---RAISE WARNING ' setting % to %',i,iMax; - IF iMax > search_mindistance[i] THEN - search_maxdistance[i] := iMax; - ELSE - search_maxdistance[i] := search_mindistance[i]; - END IF; - END IF; - END LOOP; + END IF; - INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); - address_havelevel[location.rank_address] := true; - - ELSE ---RAISE WARNING ' Stopped: % % % %', location.rank_search, location.distance, search_maxdistance[location.rank_search], location.name; - IF search_maxrank > location.rank_search THEN - search_maxrank := location.rank_search; - END IF; - END IF; - - END LOOP; - ---RAISE WARNING ' POINT LOCATIONS, % %', search_maxrank, search_diameter; - - END LOOP; --WHILE + END LOOP; - ELSE - -- Cascading search using nearest parent - END IF; + -- try using the isin value to find parent places + IF array_upper(isin_tokens, 1) IS NOT NULL THEN + FOR i IN 1..array_upper(isin_tokens, 1) LOOP + + FOR location IN SELECT place_id,search_name.name_vector,address_rank, + ST_Distance(place_centroid, search_name.centroid) as distance + FROM search_name + WHERE search_name.name_vector @> ARRAY[isin_tokens[i]] + AND search_rank < NEW.rank_search + AND (country_code = NEW.country_code OR address_rank < 4) + ORDER BY ST_distance(NEW.geometry, centroid) ASC limit 1 + LOOP + nameaddress_vector := array_merge(nameaddress_vector, location.name_vector); + INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.address_rank], location.distance, location.address_rank); + END LOOP; - IF search_country_code_conflict OR NEW.country_code IS NULL THEN - NEW.country_code := get_country_code(place_centroid); + END LOOP; END IF; - INSERT INTO search_name values (NEW.place_id, NEW.rank_search, NEW.rank_search, NEW.country_code, - name_vector, nameaddress_vector, place_centroid); - - IF NEW.country_code IS NOT NULL THEN - DELETE FROM place_addressline WHERE place_id = NEW.place_id and address_place_id in ( - select address_place_id from place_addressline join placex on (address_place_id = placex.place_id) - where place_addressline.place_id = NEW.place_id and placex.country_code != NEW.country_code and cached_rank_address >= 4); + -- if we have a name add this to the name search table + IF NEW.name IS NOT NULL THEN + INSERT INTO search_name values (NEW.place_id, NEW.rank_search, NEW.rank_search, NEW.country_code, + name_vector, nameaddress_vector, place_centroid); END IF; END IF; - return NEW; + RETURN NEW; END; $$ LANGUAGE plpgsql; @@ -1801,7 +1523,7 @@ DECLARE existing RECORD; existingplacex RECORD; existinggeometry GEOMETRY; - existingplace_id bigint; + existingplace_id INTEGER; result BOOLEAN; BEGIN @@ -1828,7 +1550,7 @@ BEGIN -- Patch in additional country names -- adminitrative (with typo) is unfortunately hard codes - this probably won't get fixed until v2 IF NEW.admin_level = 2 AND NEW.type = 'adminitrative' AND NEW.country_code is not null THEN - select add_keywords(NEW.name, country_name.name) from country_name where country_name.country_code = lower(NEW.country_code) INTO NEW.name; + select country_name.name || NEW.name from country_name where country_name.country_code = lower(NEW.country_code) INTO NEW.name; END IF; -- Have we already done this place? @@ -2044,7 +1766,7 @@ BEGIN END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_name_by_language(name keyvalue[], languagepref TEXT[]) RETURNS TEXT +CREATE OR REPLACE FUNCTION get_name_by_language(name hstore, languagepref TEXT[]) RETURNS TEXT AS $$ DECLARE search TEXT[]; @@ -2058,11 +1780,9 @@ BEGIN search := languagepref; FOR j IN 1..array_upper(search, 1) LOOP - FOR k IN 1..array_upper(name, 1) LOOP - IF (name[k].key = search[j] AND trim(name[k].value) != '') THEN - return trim(name[k].value); - END IF; - END LOOP; + IF name ? search[j] AND trim(name->search[j] != '') THEN + return trim(name->search[j]); + END IF; END LOOP; RETURN null; @@ -2092,7 +1812,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id BIGINT) RETURNS TEXT +CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id INTEGER) RETURNS TEXT AS $$ DECLARE result TEXT[]; @@ -2133,7 +1853,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, languagepref TEXT[]) RETURNS TEXT +CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id INTEGER, languagepref TEXT[]) RETURNS TEXT AS $$ DECLARE result TEXT[]; @@ -2156,7 +1876,7 @@ BEGIN FOR location IN select CASE WHEN address_place_id = for_place_id AND rank_address = 0 THEN 100 ELSE rank_address END as rank_address, - CASE WHEN type = 'postcode' THEN ARRAY[ROW('name',postcode)::keyvalue] ELSE name END as name, + CASE WHEN type = 'postcode' THEN 'name'->postcode ELSE name END as name, distance,length(name::text) as namelength from place_addressline join placex on (address_place_id = placex.place_id) where place_addressline.place_id = for_place_id and ((rank_address > 0 AND rank_address < searchrankaddress) OR address_place_id = for_place_id) @@ -2196,7 +1916,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_addressdata_by_language(for_place_id BIGINT, languagepref TEXT[]) RETURNS TEXT[] +CREATE OR REPLACE FUNCTION get_addressdata_by_language(for_place_id INTEGER, languagepref TEXT[]) RETURNS TEXT[] AS $$ DECLARE result TEXT[]; diff --git a/sql/loaddata.sql b/sql/loaddata.sql index 45df1e33..7bef299c 100644 --- a/sql/loaddata.sql +++ b/sql/loaddata.sql @@ -6,8 +6,8 @@ TRUNCATE location_area; DROP SEQUENCE seq_place; CREATE SEQUENCE seq_place start 100000; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, geometry) select * from place where osm_type = 'N'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, geometry) select * from place where osm_type = 'W'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, geometry) select * from place where osm_type = 'R'; +insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) select * from place where osm_type = 'N'; +insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) select * from place where osm_type = 'W'; +insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) select * from place where osm_type = 'R'; --select count(*) from (select create_interpolation(osm_id, housenumber) from placex where indexed=false and class='place' and type='houses') as x; diff --git a/sql/partitions.src.sql b/sql/partitions.src.sql index 12419821..a47563f6 100644 --- a/sql/partitions.src.sql +++ b/sql/partitions.src.sql @@ -10,6 +10,9 @@ create type nearfeature as ( distance float ); +CREATE TABLE location_area_country () INHERITS (location_area_large); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); + -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); @@ -24,12 +27,20 @@ CREATE INDEX idx_location_area_roadfar_-partition-_geometry ON location_area_roa create or replace function getNearRoads(in_partition TEXT, point GEOMETRY) RETURNS setof nearplace AS $$ DECLARE r nearplace%rowtype; + a BOOLEAN; BEGIN -- start IF in_partition = '-partition-' THEN - FOR r IN SELECT place_id FROM location_area_large WHERE partition = '-partition-' and ST_Contains(geometry, point) LOOP + a := FALSE; + FOR r IN SELECT place_id FROM location_area_roadnear_-partition- WHERE ST_Contains(geometry, point) ORDER BY ST_Distance(point, centroid) ASC LIMIT 1 LOOP + a := TRUE; + RETURN NEXT r; + RETURN; + END LOOP; + FOR r IN SELECT place_id FROM location_area_roadfar_-partition- WHERE ST_Contains(geometry, point) ORDER BY ST_Distance(point, centroid) ASC LOOP RETURN NEXT r; + RETURN; END LOOP; RETURN; END IF; @@ -40,22 +51,20 @@ END $$ LANGUAGE plpgsql; -create or replace function getNearFeatures(in_partition TEXT, point GEOMETRY, maxrank INTEGER) RETURNS setof nearfeature AS $$ +create or replace function getNearFeatures(in_partition TEXT, point GEOMETRY, maxrank INTEGER, isin_tokens INT[]) RETURNS setof nearfeature AS $$ DECLARE r nearfeature%rowtype; BEGIN -- start IF in_partition = '-partition-' THEN - FOR r IN SELECT - place_id, - keywords, - rank_address, - rank_search, - ST_Distance(place_centroid, centroid) as distance - FROM location_area_large - WHERE ST_Contains(area, point) and location_area_large.rank_search < maxrank - ORDER BY ST_Distance(place_centroid, centroid) ASC + FOR r IN + SELECT place_id, keywords, rank_address, rank_search, ST_Distance(point, centroid) as distance FROM ( + SELECT * FROM location_area_large_-partition- WHERE ST_Contains(geometry, point) and rank_search < maxrank + UNION ALL + SELECT * FROM location_area_country WHERE ST_Contains(geometry, point) and rank_search < maxrank + ) as location_area + ORDER BY rank_search desc, isin_tokens && keywords desc, isguess asc, rank_address asc, ST_Distance(point, centroid) ASC LOOP RETURN NEXT r; END LOOP; @@ -89,15 +98,20 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaLarge( - in_partition TEXT, in_place_id bigint, in_keywords INTEGER[], + in_partition TEXT, in_place_id bigint, in_country_code VARCHAR(2), in_keywords INTEGER[], in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE BEGIN + IF in_rank_search <= 4 THEN + INSERT INTO location_area_country values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + RETURN TRUE; + END IF; + -- start IF in_partition = '-partition-' THEN - INSERT INTO location_area_large_-partition- values (in_partition, in_place_id, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_large_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); RETURN TRUE; END IF; -- end @@ -109,7 +123,7 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaRoadNear( - in_partition TEXT, in_place_id bigint, in_keywords INTEGER[], + in_partition TEXT, in_place_id bigint, in_country_code VARCHAR(2), in_keywords INTEGER[], in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE @@ -117,7 +131,7 @@ BEGIN -- start IF in_partition = '-partition-' THEN - INSERT INTO location_area_roadnear_-partition- values (in_partition, in_place_id, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_roadnear_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); RETURN TRUE; END IF; -- end @@ -129,7 +143,7 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaRoadFar( - in_partition TEXT, in_place_id bigint, in_keywords INTEGER[], + in_partition TEXT, in_place_id bigint, in_country_code VARCHAR(2), in_keywords INTEGER[], in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE @@ -137,7 +151,7 @@ BEGIN -- start IF in_partition = '-partition-' THEN - INSERT INTO location_area_roadfar_-partition- values (in_partition, in_place_id, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_roadfar_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); RETURN TRUE; END IF; -- end diff --git a/sql/tables.sql b/sql/tables.sql index 5945ee79..a5b83c30 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -76,7 +76,8 @@ CREATE SEQUENCE seq_word start 1; drop table IF EXISTS location_area CASCADE; CREATE TABLE location_area ( partition varchar(10), - place_id bigint, + place_id INTEGER, + country_code VARCHAR(2), keywords INTEGER[], rank_search INTEGER NOT NULL, rank_address INTEGER NOT NULL, @@ -91,7 +92,7 @@ CREATE TABLE location_area_roadfar () INHERITS (location_area); drop table IF EXISTS search_name; CREATE TABLE search_name ( - place_id bigint, + place_id INTEGER, search_rank integer, address_rank integer, country_code varchar(2), @@ -106,8 +107,8 @@ CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( - place_id bigint, - address_place_id bigint, + place_id INTEGER, + address_place_id INTEGER, fromarea boolean, isaddress boolean, distance float, @@ -118,7 +119,7 @@ CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING B drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( - place_id bigint, + place_id INTEGER, minlat float, maxlat float, minlon float, @@ -136,7 +137,7 @@ drop table IF EXISTS reverse_cache; CREATE TABLE reverse_cache ( latlonzoomid integer, country_code varchar(2), - place_id bigint + place_id INTEGER ); GRANT SELECT on reverse_cache to "www-data" ; GRANT INSERT on reverse_cache to "www-data" ; @@ -149,27 +150,28 @@ CREATE TABLE country ( country_default_language_code varchar(2) ); SELECT AddGeometryColumn('country', 'geometry', 4326, 'POLYGON', 2); -insert into country select iso3166::varchar(2), ARRAY[ROW('name:en',cntry_name)::keyvalue], null, +insert into country select iso3166::varchar(2), 'name:en'->cntry_name, null, ST_Transform(geometryn(the_geom, generate_series(1, numgeometries(the_geom))), 4326) from worldboundaries; CREATE INDEX idx_country_country_code ON country USING BTREE (country_code); CREATE INDEX idx_country_geometry ON country USING GIST (geometry); drop table placex; CREATE TABLE placex ( - place_id bigint NOT NULL, + place_id INTEGER NOT NULL, partition varchar(10), osm_type char(1), - osm_id bigint, + osm_id INTEGER, class TEXT NOT NULL, type TEXT NOT NULL, name HSTORE, - admin_level integer, + admin_level INTEGER, housenumber TEXT, street TEXT, isin TEXT, postcode TEXT, country_code varchar(2), - street_place_id bigint, + extratags HSTORE, + street_place_id INTEGER, rank_address INTEGER, rank_search INTEGER, indexed_status INTEGER, @@ -182,14 +184,17 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); -CREATE INDEX idx_placex_pending ON placex USING BTREE (rank_search) where name IS NOT NULL and indexed = false; -CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) - where geometry_index(geometry_sector,indexed,name) IS NOT NULL; + +--CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); + +CREATE INDEX idx_placex_pending ON placex USING BTREE (rank_search) where indexed_status > 0; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; + +--CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(geometry_sector,indexed,name),rank_search) where geometry_index(geometry_sector,indexed,name) IS NOT NULL; + CREATE INDEX idx_placex_street_place_id ON placex USING BTREE (street_place_id) where street_place_id IS NOT NULL; -CREATE INDEX idx_placex_gb_postcodesector ON placex USING BTREE (substring(upper(postcode) from '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$')) - where country_code = 'gb' and substring(upper(postcode) from '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$') is not null; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed = false and class='place' and type='houses'; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; + CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); CLUSTER placex USING idx_placex_sector; @@ -203,40 +208,12 @@ GRANT INSERT on search_name to "www-data" ; GRANT SELECT on place_addressline to "www-data" ; GRANT INSERT ON place_addressline to "www-data" ; GRANT DELETE on place_addressline to "www-data" ; -GRANT SELECT on location_point to "www-data" ; GRANT SELECT ON seq_word to "www-data" ; GRANT UPDATE ON seq_word to "www-data" ; GRANT INSERT ON word to "www-data" ; GRANT SELECT ON planet_osm_ways to "www-data" ; GRANT SELECT ON planet_osm_rels to "www-data" ; -GRANT SELECT on location_point to "www-data" ; GRANT SELECT on location_area to "www-data" ; -GRANT SELECT on location_point_26 to "www-data" ; -GRANT SELECT on location_point_25 to "www-data" ; -GRANT SELECT on location_point_24 to "www-data" ; -GRANT SELECT on location_point_23 to "www-data" ; -GRANT SELECT on location_point_22 to "www-data" ; -GRANT SELECT on location_point_21 to "www-data" ; -GRANT SELECT on location_point_20 to "www-data" ; -GRANT SELECT on location_point_19 to "www-data" ; -GRANT SELECT on location_point_18 to "www-data" ; -GRANT SELECT on location_point_17 to "www-data" ; -GRANT SELECT on location_point_16 to "www-data" ; -GRANT SELECT on location_point_15 to "www-data" ; -GRANT SELECT on location_point_14 to "www-data" ; -GRANT SELECT on location_point_13 to "www-data" ; -GRANT SELECT on location_point_12 to "www-data" ; -GRANT SELECT on location_point_11 to "www-data" ; -GRANT SELECT on location_point_10 to "www-data" ; -GRANT SELECT on location_point_9 to "www-data" ; -GRANT SELECT on location_point_8 to "www-data" ; -GRANT SELECT on location_point_7 to "www-data" ; -GRANT SELECT on location_point_6 to "www-data" ; -GRANT SELECT on location_point_5 to "www-data" ; -GRANT SELECT on location_point_4 to "www-data" ; -GRANT SELECT on location_point_3 to "www-data" ; -GRANT SELECT on location_point_2 to "www-data" ; -GRANT SELECT on location_point_1 to "www-data" ; GRANT SELECT on country to "www-data" ; -- insert creates the location tagbles, creates location indexes if indexed == true diff --git a/utils/setup.php b/utils/setup.php index 26179d04..bc8f14f7 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -11,29 +11,42 @@ array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), array('create-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'), - array('load-data', '', 0, 1, 0, 0, 'realpath', 'Import a osm file'), + array('load-data', '', 0, 1, 1, 1, 'realpath', 'Import a osm file'), array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); + $bDidSomething = false; + if ($aCMDResult['create-db']) { + $bDidSomething = true; // TODO: path detection, detection memory, etc. - passthru('createdb nominatim'); +// passthru('createdb nominatim'); passthru('createlang plpgsql nominatim'); - passthru('psql -f /mqdata/mapquest/postgres-9.0.1-server/share/contrib/_int.sql nominatim'); - passthru('psql -f /mqdata/mapquest/postgres-9.0.1-server/share/contrib/hstore.sql nominatim'); - passthru('psql -f /mqdata/mapquest/postgres-9.0.1-server/share/contrib/postgis-1.5/postgis.sql nominatim'); - passthru('psql -f /mqdata/mapquest/postgres-9.0.1-server/share/contrib/postgis-1.5/spatial_ref_sys.sql nominatim'); + passthru('psql -f '.CONST_Path_Postgresql_Contrib.'/_int.sql nominatim'); + passthru('psql -f '.CONST_Path_Postgresql_Contrib.'/hstore.sql nominatim'); + passthru('psql -f '.CONST_Path_Postgresql_Postgis.'/postgis.sql nominatim'); + passthru('psql -f '.CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/data/country_name.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/data/country_osm_grid.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/data/gb_postcode.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/data/us_statecounty.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/data/us_state.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/data/worldboundaries.sql nominatim'); } - if ($aCMDResult['load-data']) + if (isset($aCMDResult['load-data']) && $aCMDResult['load-data']) { - passthru(CONST_BasePath.'/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['load-data']); + $bDidSomething = true; + passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['load-data']); + passthru('psql -f '.CONST_BasePath.'/sql/functions.sql nominatim'); + passthru('psql -f '.CONST_BasePath.'/sql/tables.sql nominatim'); } if ($aCMDResult['create-partitions']) { + $bDidSomething = true; $sSQL = 'select distinct country_code from country_name order by country_code'; $aPartitions = $oDB->getCol($sSQL); if (PEAR::isError($aPartitions)) @@ -57,4 +70,7 @@ exit; } - showUsage($aCMDOptions, true); + if (!$bDidSomething) + { + showUsage($aCMDOptions, true); + } -- 2.39.5