From a6453a808db7dccd687a3fceeedff88b0fd2e06c Mon Sep 17 00:00:00 2001 From: Brian Quinion Date: Wed, 17 Nov 2010 15:19:25 +0000 Subject: [PATCH] remove the road buffer code - made things slower. Plus some more tiger import code --- settings/settings.php | 2 +- sql/functions.sql | 64 ++++++++++++++++++++------------- sql/partitions.src.sql | 79 +--------------------------------------- utils/setup.php | 81 +++++++++++++++++++++++++++++++++++------- 4 files changed, 109 insertions(+), 117 deletions(-) diff --git a/settings/settings.php b/settings/settings.php index 51c66ee0..ca90b3c5 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -5,7 +5,7 @@ // General settings @define('CONST_Debug', false); - @define('CONST_Database_DSN', 'pgsql://brian@/nominatim'); + @define('CONST_Database_DSN', 'pgsql://@/nominatim'); // Paths @define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/9.0/contrib'); diff --git a/sql/functions.sql b/sql/functions.sql index e17ddf8d..b71ca380 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -575,8 +575,8 @@ DECLARE x BOOLEAN; BEGIN - IF rank_search > 26 THEN - RAISE EXCEPTION 'Adding location with rank > 26 (% rank %)', place_id, rank_search; + IF rank_search > 25 THEN + RAISE EXCEPTION 'Adding location with rank > 25 (% rank %)', place_id, rank_search; END IF; x := deleteLocationArea(partition, place_id); @@ -1318,10 +1318,8 @@ BEGIN IF NEW.parent_place_id IS NULL AND NEW.street IS NOT NULL THEN address_street_word_id := get_name_id(make_standard_name(NEW.street)); ---RAISE WARNING 'street: % %', NEW.street, address_street_word_id; IF address_street_word_id IS NOT NULL THEN FOR location IN SELECT * from getNearestNamedRoadFeature(NEW.partition, place_centroid, address_street_word_id) LOOP ---RAISE WARNING 'streetname found nearby %',location; NEW.parent_place_id := location.place_id; END LOOP; END IF; @@ -1330,12 +1328,6 @@ BEGIN --RAISE WARNING 'x4'; -- Still nothing, just use the nearest road --- IF NEW.parent_place_id IS NULL THEN --- FOR location IN SELECT place_id FROM getNearRoads(NEW.partition, place_centroid) LOOP --- NEW.parent_place_id := location.place_id; --- END LOOP; --- END IF; - search_diameter := 0.00005; WHILE NEW.parent_place_id IS NULL AND search_diameter < 0.1 LOOP FOR location IN SELECT place_id FROM placex @@ -1465,15 +1457,25 @@ DECLARE b BOOLEAN; BEGIN - -- mark everything linked to this place for re-indexing - UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = OLD.place_id - and placex.place_id = place_addressline.place_id and indexed_status = 0; + IF OLD.rank_address < 30 THEN + + -- mark everything linked to this place for re-indexing + UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = OLD.place_id + and placex.place_id = place_addressline.place_id and indexed_status = 0; + + DELETE FROM place_addressline where address_place_id = OLD.place_id; + + END IF; + + IF OLD.rank_address < 26 THEN + b := deleteLocationArea(OLD.partition, OLD.place_id); + END IF; + + IF OLD.name is not null THEN + b := deleteSearchName(OLD.partition, OLD.place_id); + END IF; - -- do the actual delete - b := deleteLocationArea(OLD.partition, OLD.place_id); - b := deleteSearchName(OLD.partition, OLD.place_id); DELETE FROM place_addressline where place_id = OLD.place_id; - DELETE FROM place_addressline where address_place_id = OLD.place_id; RETURN OLD; @@ -2212,37 +2214,49 @@ CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_star DECLARE startnumber INTEGER; + endnumber INTEGER; stepsize INTEGER; housenum INTEGER; newpoints INTEGER; numberrange INTEGER; + rangestartnumber INTEGER; BEGIN - numberrange := in_endnumber - in_startnumber; + IF in_endnumber > in_startnumber THEN + startnumber = in_startnumber; + endnumber = in_endnumber; + ELSE + startnumber = in_endnumber; + endnumber = in_startnumber; + END IF; + + numberrange := endnumber - startnumber; + rangestartnumber := startnumber; IF (interpolationtype = 'odd' AND startnumber%2 = 0) OR (interpolationtype = 'even' AND startnumber%2 = 1) THEN - startnumber := in_startnumber + 1; + startnumber := startnumber + 1; stepsize := 2; ELSE IF (interpolationtype = 'odd' OR interpolationtype = 'even') THEN - startnumber := in_startnumber; stepsize := 2; ELSE -- everything else assumed to be 'all' - startnumber := in_startnumber; stepsize := 1; END IF; END IF; ---this is a one time operation - skip the delete ---delete from placex where osm_type = 'N' and osm_id = prevnode.osm_id and type = 'house' and place_id != prevnode.place_id; + -- Filter out really broken tiger data + IF numberrange > 0 AND (numberrange::float/stepsize::float > 500) AND ST_length(linegeo)/(numberrange::float/stepsize::float) < 0.000001 THEN + RAISE WARNING 'Road too short for number range % to % on %, % (%)',startnumber,endnumber,in_street,in_isin,ST_length(linegeo)/(numberrange::float/stepsize::float); + RETURN 0; + END IF; newpoints := 0; - FOR housenum IN startnumber..in_endnumber BY stepsize LOOP + FOR housenum IN startnumber..endnumber BY stepsize LOOP insert into placex (osm_type, osm_id, class, type, admin_level, housenumber, street, isin, postcode, country_code, parent_place_id, rank_address, rank_search, indexed_status, geometry) values ('T', nextval('seq_tigger_house'), 'place', 'house', null, housenum, in_street, in_isin, in_postcode, - 'us', null, 30, 30, 1, ST_Line_Interpolate_Point(linegeo, (housenum::float-in_startnumber::float)/numberrange::float)); + 'us', null, 30, 30, 1, ST_Line_Interpolate_Point(linegeo, (housenum::float-rangestartnumber::float)/numberrange::float)); newpoints := newpoints + 1; END LOOP; diff --git a/sql/partitions.src.sql b/sql/partitions.src.sql index 4fe06ea2..19c70b2e 100644 --- a/sql/partitions.src.sql +++ b/sql/partitions.src.sql @@ -25,14 +25,6 @@ CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); -CREATE TABLE location_area_roadnear_-partition- () INHERITS (location_area_roadnear); -CREATE INDEX idx_location_area_roadnear_-partition-_place_id ON location_area_roadnear_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_roadnear_-partition-_geometry ON location_area_roadnear_-partition- USING GIST (geometry); - -CREATE TABLE location_area_roadfar_-partition- () INHERITS (location_area_roadfar); -CREATE INDEX idx_location_area_roadfar_-partition-_place_id ON location_area_roadfar_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_roadfar_-partition-_geometry ON location_area_roadfar_-partition- USING GIST (geometry); - CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); @@ -41,33 +33,6 @@ CREATE INDEX idx_search_name_-partition-_nameaddress_vector ON search_name_-part -- end -create or replace function getNearRoads(in_partition INTEGER, point GEOMETRY) RETURNS setof nearplace AS $$ -DECLARE - r nearplace%rowtype; - a BOOLEAN; -BEGIN - --- start - IF in_partition = -partition- THEN - a := FALSE; - FOR r IN SELECT place_id FROM location_area_roadnear_-partition- WHERE ST_Contains(geometry, point) ORDER BY ST_Distance(point, centroid) ASC LIMIT 1 LOOP - a := TRUE; - RETURN NEXT r; - RETURN; - END LOOP; - FOR r IN SELECT place_id FROM location_area_roadfar_-partition- WHERE ST_Contains(geometry, point) ORDER BY ST_Distance(point, centroid) ASC LOOP - RETURN NEXT r; - RETURN; - END LOOP; - RETURN; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; -END -$$ -LANGUAGE plpgsql; - create or replace function getNearFeatures(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_tokens INT[]) RETURNS setof nearfeature AS $$ DECLARE r nearfeature%rowtype; @@ -100,9 +65,7 @@ BEGIN -- start IF in_partition = -partition- THEN --- DELETE from location_area_large_-partition- WHERE place_id = in_place_id; --- DELETE from location_area_roadnear_-partition- WHERE place_id = in_place_id; --- DELETE from location_area_roadfar_-partition- WHERE place_id = in_place_id; + DELETE from location_area_large_-partition- WHERE place_id = in_place_id; RETURN TRUE; END IF; -- end @@ -139,46 +102,6 @@ END $$ LANGUAGE plpgsql; -create or replace function insertLocationAreaRoadNear( - in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), in_keywords INTEGER[], - in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, - in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ -DECLARE -BEGIN - --- start - IF in_partition = -partition- THEN - INSERT INTO location_area_roadnear_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); - RETURN TRUE; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; - RETURN FALSE; -END -$$ -LANGUAGE plpgsql; - -create or replace function insertLocationAreaRoadFar( - in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), in_keywords INTEGER[], - in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, - in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ -DECLARE -BEGIN - --- start - IF in_partition = -partition- THEN - INSERT INTO location_area_roadfar_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); - RETURN TRUE; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; - RETURN FALSE; -END -$$ -LANGUAGE plpgsql; - create or replace function getNearestNamedFeature(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_token INTEGER) RETURNS setof nearfeature AS $$ DECLARE r nearfeature%rowtype; diff --git a/utils/setup.php b/utils/setup.php index 1ac690f7..c515fcc8 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -22,11 +22,25 @@ array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'), array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'), array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'), + array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data'), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); $bDidSomething = false; + // This is a pretty hard core defult - the number of processors in the box - 1 + $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1); + if ($iInstances < 1) + { + $iInstances = 1; + echo "WARNING: resetting threads to $iInstances\n"; + } + if ($iInstances > getProcessorCount()) + { + $iInstances = getProcessorCount(); + echo "WARNING: resetting threads to $iInstances\n"; + } + if ($aCMDResult['create-db'] || $aCMDResult['all']) { $bDidSomething = true; @@ -134,19 +148,6 @@ if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection)); echo '.'; - // This is a pretty hard core defult - the number of processors in the box - 1 - $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1); - if ($iInstances < 1) - { - $iInstances = 1; - echo "WARNING: reseting threads to $iInstances\n"; - } - if ($iInstances > getProcessorCount()) - { - $iInstances = getProcessorCount(); - echo "WARNING: reseting threads to $iInstances\n"; - } - $aDBInstances = array(); for($i = 0; $i < $iInstances; $i++) { @@ -171,6 +172,60 @@ echo "\n"; } + if ($aCMDResult['import-tiger-data'] || $aCMDResult['all']) + { + $bDidSomething = true; + + $aDBInstances = array(); + for($i = 0; $i < $iInstances; $i++) + { + $aDBInstances[$i] =& getDB(true); + } + + foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile) + { + echo $sFile.': '; + $hFile = fopen($sFile, "r"); + $sSQL = fgets($hFile, 100000); + $iLines = 0; + + while(true) + { + for($i = 0; $i < $iInstances; $i++) + { + if (!pg_connection_busy($aDBInstances[$i]->connection)) + { + while(pg_get_result($aDBInstances[$i]->connection)); + $sSQL = fgets($hFile, 100000); + if (!$sSQL) break 2; + if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + $iLines++; + if ($iLines == 1000) + { + echo "."; + $iLines = 0; + } + } + } + usleep(10); + } + + fclose($hFile); + + $bAnyBusy = true; + while($bAnyBusy) + { + $bAnyBusy = false; + for($i = 0; $i < $iInstances; $i++) + { + if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true; + } + usleep(10); + } + echo "\n"; + } + } + if (!$bDidSomething) { showUsage($aCMDOptions, true); -- 2.39.5