pre-create word list to avoid duplicates due to parallel indexing

[nominatim.git] / sql / functions.sql
diff --git a/sql/functions.sql b/sql/functions.sql

index 5ce5d5ddc49efbd47b0c365c91f73f5dabbb1113..b4657bf8992ca17166efd6116d9e4a4a4ee96019 100644 (file)
--- a/sql/functions.sql
+++ b/sql/functions.sql
@@ -302,7 +302,7 @@ BEGIN
    END IF;
    r := a;
    FOR i IN 1..array_upper(b, 1) LOOP  
-    IF NOT (ARRAY[b[i]] && r) THEN
+    IF NOT (ARRAY[b[i]] <@ r) THEN
        r := r || b[i];
      END IF;
    END LOOP;
@@ -328,15 +328,18 @@ BEGIN
      s := make_standard_name(item.value);
  
      w := getorcreate_name_id(s, item.value);
-    result := result | w;
+
+    IF not(ARRAY[w] <@ result) THEN
+      result := result || w;
+    END IF;
  
      words := string_to_array(s, ' ');
      IF array_upper(words, 1) IS NOT NULL THEN
        FOR j IN 1..array_upper(words, 1) LOOP
          IF (words[j] != '') THEN
            w = getorcreate_word_id(words[j]);
-          IF NOT (ARRAY[w] && result) THEN
-            result := result | w;
+          IF NOT (ARRAY[w] <@ result) THEN
+            result := result || w;
            END IF;
          END IF;
        END LOOP;
@@ -348,8 +351,8 @@ BEGIN
          s := make_standard_name(words[j]);
          IF s != '' THEN
            w := getorcreate_word_id(s);
-          IF NOT (ARRAY[w] && result) THEN
-            result := result | w;
+          IF NOT (ARRAY[w] <@ result) THEN
+            result := result || w;
            END IF;
          END IF;
        END LOOP;
@@ -360,8 +363,8 @@ BEGIN
        s := make_standard_name(s);
        IF s != '' THEN
          w := getorcreate_name_id(s, item.value);
-        IF NOT (ARRAY[w] && result) THEN
-          result := result | w;
+        IF NOT (ARRAY[w] <@ result) THEN
+          result := result || w;
          END IF;
        END IF;
      END IF;
@@ -386,9 +389,9 @@ BEGIN
    result := '{}'::INTEGER[];
  
    s := make_standard_name(src);
-  w := getorcreate_name_id(s);
+  w := getorcreate_name_id(s, src);
  
-  IF NOT (ARRAY[w] && result) THEN
+  IF NOT (ARRAY[w] <@ result) THEN
      result := result || w;
    END IF;
  
@@ -397,13 +400,37 @@ BEGIN
      FOR j IN 1..array_upper(words, 1) LOOP
        IF (words[j] != '') THEN
          w = getorcreate_word_id(words[j]);
-        IF NOT (ARRAY[w] && result) THEN
+        IF NOT (ARRAY[w] <@ result) THEN
            result := result || w;
          END IF;
        END IF;
      END LOOP;
    END IF;
  
+  words := regexp_split_to_array(src, E'[,;()]');
+  IF array_upper(words, 1) != 1 THEN
+    FOR j IN 1..array_upper(words, 1) LOOP
+      s := make_standard_name(words[j]);
+      IF s != '' THEN
+        w := getorcreate_word_id(s);
+        IF NOT (ARRAY[w] <@ result) THEN
+          result := result || w;
+        END IF;
+      END IF;
+    END LOOP;
+  END IF;
+
+  s := regexp_replace(src, '市$', '');
+  IF s != src THEN
+    s := make_standard_name(s);
+    IF s != '' THEN
+      w := getorcreate_name_id(s, src);
+      IF NOT (ARRAY[w] <@ result) THEN
+        result := result || w;
+      END IF;
+    END IF;
+  END IF;
+
    RETURN result;
  END;
  $$
@@ -690,9 +717,11 @@ BEGIN
        and search_name.place_id = place_addressline.place_id
    LOOP
      delete from search_name where place_id = childplace.place_id;
-    childplace.nameaddress_vector := uniq(sort_asc(childplace.nameaddress_vector + to_add));
-    IF childplace.place_id = parent_place_id THEN
-      childplace.name_vector := uniq(sort_asc(childplace.name_vector + to_add));
+    IF not (ARRAY[to_add] <@ childplace.nameaddress_vector) THEN
+      childplace.nameaddress_vector := childplace.nameaddress_vector || to_add;
+    END IF;
+    IF childplace.place_id = parent_place_id and not (ARRAY[to_add] <@ childplace.name_vector) THEN
+      childplace.name_vector := childplace.name_vector || to_add;
      END IF;
      insert into search_name (place_id, search_rank, address_rank, country_code, name_vector, nameaddress_vector, centroid) 
        values (childplace.place_id, childplace.search_rank, childplace.address_rank, childplace.country_code, 
@@ -737,7 +766,7 @@ $$
  LANGUAGE plpgsql;
  
  
-CREATE OR REPLACE FUNCTION create_interpolation(wayid INTEGER, interpolationtype TEXT) RETURNS INTEGER
+CREATE OR REPLACE FUNCTION create_interpolation(wayid BIGINT, interpolationtype TEXT) RETURNS INTEGER
    AS $$
  DECLARE
    
@@ -788,7 +817,7 @@ BEGIN
            linestr := linestr||','||ST_X(nextnode.geometry)||' '||ST_Y(nextnode.geometry);
            endnumber := ('0'||substring(nextnode.housenumber,'[0-9]+'))::integer;
  
-          IF startnumber IS NOT NULL and startnumber > 0 AND endnumber IS NOT NULL and endnumber > 0 THEN
+          IF startnumber IS NOT NULL and startnumber > 0 AND endnumber IS NOT NULL and endnumber > 0 AND @(startnumber - endnumber) < 1000 THEN
  
  --RAISE WARNING 'interpolation end % % ',nextnode.place_id,endnumber;
  
@@ -870,6 +899,7 @@ DECLARE
    country_code VARCHAR(2);
    default_language VARCHAR(10);
    diameter FLOAT;
+  classtable TEXT;
  BEGIN
  --  RAISE WARNING '%',NEW.osm_id;
  
@@ -903,18 +933,23 @@ BEGIN
    NEW.place_id := nextval('seq_place');
    NEW.indexed_status := 1; --STATUS_NEW
  
-  NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+  IF NEW.rank_search >= 4 THEN
+    NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+  ELSE
+    NEW.country_code := NULL;
+  END IF;
+
    NEW.partition := get_partition(NEW.geometry, NEW.country_code);
    NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
  
    -- copy 'name' to or from the default language (if there is a default language)
-  IF NEW.name is not null AND array_upper(%#NEW.name,1) > 1 THEN
+  IF NEW.name is not null AND array_upper(akeys(NEW.name),1) > 1 THEN
      default_language := get_country_language_code(NEW.country_code);
      IF default_language IS NOT NULL THEN
        IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
          NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
        ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
-        NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
+        NEW.name := NEW.name || ('name' => (NEW.name -> ('name:'||default_language)));
        END IF;
      END IF;
    END IF;
@@ -934,7 +969,55 @@ BEGIN
      NEW.rank_address := NEW.rank_search;
  
      -- By doing in postgres we have the country available to us - currently only used for postcode
-    IF NEW.class = 'place' THEN
+    IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
+
+        IF NEW.postcode IS NULL THEN
+            -- most likely just a part of a multipolygon postcode boundary, throw it away
+            RETURN NULL;
+        END IF;
+
+        NEW.name := 'ref'=>NEW.postcode;
+
+        IF NEW.country_code = 'gb' THEN
+
+          IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
+            NEW.rank_search := 25;
+            NEW.rank_address := 5;
+          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
+            NEW.rank_search := 23;
+            NEW.rank_address := 5;
+          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
+            NEW.rank_search := 21;
+            NEW.rank_address := 5;
+          END IF;
+
+        ELSEIF NEW.country_code = 'de' THEN
+
+          IF NEW.postcode ~ '^([0-9]{5})$' THEN
+            NEW.rank_search := 21;
+            NEW.rank_address := 11;
+          END IF;
+
+        ELSE
+          -- Guess at the postcode format and coverage (!)
+          IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
+            NEW.rank_search := 21;
+            NEW.rank_address := 11;
+          ELSE
+            -- Does it look splitable into and area and local code?
+            postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
+
+            IF postcode IS NOT NULL THEN
+              NEW.rank_search := 25;
+              NEW.rank_address := 11;
+            ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
+              NEW.rank_search := 21;
+              NEW.rank_address := 11;
+            END IF;
+          END IF;
+        END IF;
+
+    ELSEIF NEW.class = 'place' THEN
        IF NEW.type in ('continent') THEN
          NEW.rank_search := 2;
          NEW.rank_address := NEW.rank_search;
@@ -948,8 +1031,8 @@ BEGIN
          NEW.rank_search := 8;
          NEW.rank_address := NEW.rank_search;
        ELSEIF NEW.type in ('region') THEN
-        NEW.rank_search := 10;
-        NEW.rank_address := NEW.rank_search;
+        NEW.rank_search := 18; -- dropped from previous value of 10
+        NEW.rank_address := 0; -- So badly miss-used that better to just drop it!
        ELSEIF NEW.type in ('county') THEN
          NEW.rank_search := 12;
          NEW.rank_address := NEW.rank_search;
@@ -980,55 +1063,16 @@ BEGIN
        ELSEIF NEW.type in ('suburb','croft','subdivision') THEN
          NEW.rank_search := 20;
          NEW.rank_address := NEW.rank_search;
-      ELSEIF NEW.type in ('farm','locality','islet') THEN
+      ELSEIF NEW.type in ('farm','locality','islet','isolated_dwelling','mountain_pass') THEN
          NEW.rank_search := 20;
          NEW.rank_address := 0;
+        -- Irish townlands, tagged as place=locality and locality=townland
+        IF (NEW.extratags -> 'locality') = 'townland' THEN
+          NEW.rank_address := 20;
+        END IF;
        ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN
          NEW.rank_search := 22;
          NEW.rank_address := 22;
-      ELSEIF NEW.type in ('postcode') THEN
-
-        NEW.name := 'ref'=>NEW.postcode;
-
-        IF NEW.country_code = 'gb' THEN
-
-          IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
-            NEW.rank_search := 25;
-            NEW.rank_address := 5;
-          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
-            NEW.rank_search := 23;
-            NEW.rank_address := 5;
-          ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
-            NEW.rank_search := 21;
-            NEW.rank_address := 5;
-          END IF;
-
-        ELSEIF NEW.country_code = 'de' THEN
-
-          IF NEW.postcode ~ '^([0-9]{5})$' THEN
-            NEW.rank_search := 21;
-            NEW.rank_address := 11;
-          END IF;
-
-        ELSE
-          -- Guess at the postcode format and coverage (!)
-          IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
-            NEW.rank_search := 21;
-            NEW.rank_address := 11;
-          ELSE
-            -- Does it look splitable into and area and local code?
-            postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
-
-            IF postcode IS NOT NULL THEN
-              NEW.rank_search := 25;
-              NEW.rank_address := 11;
-            ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
-              NEW.rank_search := 21;
-              NEW.rank_address := 11;
-            END IF;
-          END IF;
-        END IF;
-
        ELSEIF NEW.type in ('airport','street') THEN
          NEW.rank_search := 26;
          NEW.rank_address := NEW.rank_search;
@@ -1109,11 +1153,12 @@ BEGIN
      IF st_area(NEW.geometry) < 1 THEN
        -- mark items within the geometry for re-indexing
  --    RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
--- work around bug in postgis
+
+      -- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547)
        update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) 
-       AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point';
+       AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point' and (rank_search < 28 or name is not null);
        update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) 
-       AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point';
+       AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point' and (rank_search < 28 or name is not null);
      END IF;
    ELSE
      -- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :(
@@ -1138,11 +1183,22 @@ BEGIN
      END IF;
      IF diameter > 0 THEN
  --      RAISE WARNING 'placex point insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,diameter;
-      update placex set indexed_status = 2 where indexed_status = 0 and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter);
+      update placex set indexed_status = 2 where indexed_status = 0 and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter) and (rank_search < 28 or name is not null);
      END IF;
  
    END IF;
  
+   -- add to tables for special search
+   -- Note: won't work on initial import because the classtype tables
+   -- do not yet exist. It won't hurt either.
+  classtable := 'place_classtype_' || NEW.class || '_' || NEW.type;
+  SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable INTO result;
+  IF result THEN
+    EXECUTE 'INSERT INTO ' || classtable::regclass || ' (place_id, centroid) VALUES ($1,$2)' 
+    USING NEW.place_id, ST_Centroid(NEW.geometry);
+  END IF;
+
+
  --  IF NEW.rank_search < 26 THEN
  --    RAISE WARNING 'placex insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
  --  END IF;
@@ -1171,6 +1227,9 @@ DECLARE
    location RECORD;
    way RECORD;
    relation RECORD;
+  relation_members TEXT[];
+  relMember RECORD;
+  linkedplacex RECORD;
    search_diameter FLOAT;
    search_prevdiameter FLOAT;
    search_maxrank INTEGER;
@@ -1186,9 +1245,12 @@ DECLARE
  
    tagpairid INTEGER;
  
+  default_language TEXT;
    name_vector INTEGER[];
    nameaddress_vector INTEGER[];
  
+  linked_node_id BIGINT;
+
    result BOOLEAN;
  BEGIN
  
@@ -1200,9 +1262,9 @@ BEGIN
      RETURN NEW;
    END IF;
  
-  -- defered delete
+  -- deferred delete
    IF OLD.indexed_status = 100 THEN
-    delete from placex where osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type;
+    delete from placex where place_id = OLD.place_id;
      RETURN NULL;
    END IF;
  
@@ -1215,15 +1277,22 @@ BEGIN
        RETURN NEW;
      END IF;
  
-    result := deleteSearchName(NEW.partition, NEW.place_id);
-    DELETE FROM place_addressline WHERE place_id = NEW.place_id;
-    DELETE FROM place_boundingbox where place_id = NEW.place_id;
-    result := deleteRoad(NEW.partition, NEW.place_id);
-    result := deleteLocationArea(NEW.partition, NEW.place_id);
+    IF OLD.indexed_status > 1 THEN
+      result := deleteSearchName(NEW.partition, NEW.place_id);
+      DELETE FROM place_addressline WHERE place_id = NEW.place_id;
+      DELETE FROM place_boundingbox where place_id = NEW.place_id;
+      result := deleteRoad(NEW.partition, NEW.place_id);
+      result := deleteLocationArea(NEW.partition, NEW.place_id);
+      UPDATE placex set linked_place_id = null where linked_place_id = NEW.place_id;
+    END IF;
  
      -- reclaculate country and partition (should probably have a country_code and calculated_country_code as seperate fields)
-    SELECT country_code from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO NEW.country_code;
-    NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+    IF NEW.rank_search >= 4 THEN
+      SELECT country_code from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO NEW.country_code;
+      NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+    ELSE
+      NEW.country_code := NULL;
+    END IF;
      NEW.partition := get_partition(NEW.geometry, NEW.country_code);
      NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
  
@@ -1236,6 +1305,20 @@ BEGIN
      -- Speed up searches - just use the centroid of the feature
      -- cheaper but less acurate
      place_centroid := ST_Centroid(NEW.geometry);
+    NEW.centroid := null;
+
+    -- Thought this wasn't needed but when we add new languages to the country_name table
+    -- we need to update the existing names
+    IF NEW.name is not null AND array_upper(akeys(NEW.name),1) > 1 THEN
+      default_language := get_country_language_code(NEW.country_code);
+      IF default_language IS NOT NULL THEN
+        IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
+          NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
+        ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
+          NEW.name := NEW.name || ('name' => (NEW.name -> ('name:'||default_language)));
+        END IF;
+      END IF;
+    END IF;
  
      -- Initialise the name vector using our name
      name_vector := make_keywords(NEW.name);
@@ -1251,6 +1334,12 @@ BEGIN
        address_havelevel[i] := false;
      END LOOP;
  
+    NEW.importance := null;
+    select language||':'||title,importance from get_wikipedia_match(NEW.extratags) INTO NEW.wikipedia,NEW.importance;
+    IF NEW.importance IS NULL THEN
+      select language||':'||title,importance from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
+    END IF;
+
  --RAISE WARNING '% %', NEW.place_id, NEW.rank_search;
  
      -- For low level elements we inherit from our parent road
@@ -1266,7 +1355,7 @@ BEGIN
        IF NEW.parent_place_id IS NULL AND NEW.osm_type = 'N' THEN
  
          -- Is this node part of a relation?
-        FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id::integer] and members @> ARRAY['n'||NEW.osm_id]
+        FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id] and members @> ARRAY['n'||NEW.osm_id]
          LOOP
            -- At the moment we only process one type of relation - associatedStreet
            IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
@@ -1282,7 +1371,7 @@ BEGIN
  
  --RAISE WARNING 'x1';
          -- Is this node part of a way?
-        FOR way IN select id from planet_osm_ways where nodes && ARRAY[NEW.osm_id::integer] LOOP
+        FOR way IN select id from planet_osm_ways where nodes @> ARRAY[NEW.osm_id] LOOP
  --RAISE WARNING '%', way;
          FOR location IN select * from placex where osm_type = 'W' and osm_id = way.id
          LOOP
@@ -1294,19 +1383,21 @@ BEGIN
            END IF;
  
            -- Is the WAY part of a relation
-          FOR relation IN select * from planet_osm_rels where parts @> ARRAY[location.osm_id::integer] and members @> ARRAY['w'||location.osm_id]
-          LOOP
-            -- At the moment we only process one type of relation - associatedStreet
-            IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
-              FOR i IN 1..array_upper(relation.members, 1) BY 2 LOOP
-                IF NEW.parent_place_id IS NULL AND relation.members[i+1] = 'street' THEN
---RAISE WARNING 'node in way that is in a relation %',relation;
-                  SELECT place_id from placex where osm_type='W' and osm_id = substring(relation.members[i],2,200)::integer 
-                    and rank_search = 26 INTO NEW.parent_place_id;
+          IF NEW.parent_place_id IS NULL THEN
+              FOR relation IN select * from planet_osm_rels where parts @> ARRAY[location.osm_id] and members @> ARRAY['w'||location.osm_id]
+              LOOP
+                -- At the moment we only process one type of relation - associatedStreet
+                IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
+                  FOR i IN 1..array_upper(relation.members, 1) BY 2 LOOP
+                    IF NEW.parent_place_id IS NULL AND relation.members[i+1] = 'street' THEN
+    --RAISE WARNING 'node in way that is in a relation %',relation;
+                      SELECT place_id from placex where osm_type='W' and osm_id = substring(relation.members[i],2,200)::integer 
+                        and rank_search = 26 INTO NEW.parent_place_id;
+                    END IF;
+                  END LOOP;
                  END IF;
                END LOOP;
-            END IF;
-          END LOOP;
+          END IF;    
            
            -- If the way contains an explicit name of a street copy it
            IF NEW.street IS NULL AND location.street IS NOT NULL THEN
@@ -1338,7 +1429,7 @@ BEGIN
  
        IF NEW.parent_place_id IS NULL AND NEW.osm_type = 'W' THEN
          -- Is this way part of a relation?
-        FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id::integer] and members @> ARRAY['w'||NEW.osm_id]
+        FOR relation IN select * from planet_osm_rels where parts @> ARRAY[NEW.osm_id] and members @> ARRAY['w'||NEW.osm_id]
          LOOP
            -- At the moment we only process one type of relation - associatedStreet
            IF relation.tags @> ARRAY['associatedStreet'] AND array_upper(relation.members, 1) IS NOT NULL THEN
@@ -1416,6 +1507,114 @@ BEGIN
  
  -- RAISE WARNING '  INDEXING: %',NEW;
  
+    IF NEW.osm_type = 'R' AND NEW.rank_search < 26 THEN
+
+      -- see if we have any special relation members
+      select members from planet_osm_rels where id = NEW.osm_id INTO relation_members;
+
+      FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['label']) as member LOOP
+
+        select * from placex where osm_type = upper(substring(relMember.member,1,1)) 
+          and osm_id = substring(relMember.member,2,10000)::integer order by rank_search desc limit 1 into linkedPlacex;
+
+        -- If we don't already have one use this as the centre point of the geometry
+        IF NEW.centroid IS NULL THEN
+          NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry));
+        END IF;
+
+        -- merge in the label name, re-init word vector
+        NEW.name := linkedPlacex.name || NEW.name;
+        name_vector := make_keywords(NEW.name);
+
+        -- merge in extra tags
+        NEW.extratags := linkedPlacex.extratags || NEW.extratags;
+
+        -- mark the linked place (excludes from search results)
+        UPDATE placex set linked_place_id = NEW.place_id where place_id = linkedPlacex.place_id;
+
+      END LOOP;
+
+      IF NEW.centroid IS NULL THEN
+
+        FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['admin_center','admin_centre']) as member LOOP
+
+          select * from placex where osm_type = upper(substring(relMember.member,1,1)) 
+            and osm_id = substring(relMember.member,2,10000)::integer order by rank_search desc limit 1 into linkedPlacex;
+
+          -- For an admin centre we also want a name match - still not perfect, for example 'new york, new york'
+          -- But that can be fixed by explicitly setting the label in the data
+          IF make_standard_name(NEW.name->'name') = make_standard_name(linkedPlacex.name->'name') 
+            AND NEW.rank_search = linkedPlacex.rank_search THEN
+
+            -- If we don't already have one use this as the centre point of the geometry
+            IF NEW.centroid IS NULL THEN
+              NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry));
+            END IF;
+
+            -- merge in the name, re-init word vector
+            NEW.name := linkedPlacex.name || NEW.name;
+            name_vector := make_keywords(NEW.name);
+
+            -- merge in extra tags
+            NEW.extratags := linkedPlacex.extratags || NEW.extratags;
+
+            -- mark the linked place (excludes from search results)
+            UPDATE placex set linked_place_id = NEW.place_id where place_id = linkedPlacex.place_id;
+
+            -- keep a note of the node id in case we need it for wikipedia in a bit
+            linked_node_id := linkedPlacex.osm_id;
+          END IF;
+
+        END LOOP;
+
+      END IF;
+
+      -- not found one yet? how about doing a name search
+      IF NEW.centroid IS NULL AND (NEW.name->'name') is not null and make_standard_name(NEW.name->'name') != '' THEN
+
+        FOR linkedPlacex IN select placex.* from placex WHERE
+          make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
+          AND placex.rank_search = NEW.rank_search
+          AND placex.place_id != NEW.place_id
+          AND placex.osm_type = 'N' AND placex.rank_search < 26
+          AND st_contains(NEW.geometry, placex.geometry)
+        LOOP
+
+          -- If we don't already have one use this as the centre point of the geometry
+          IF NEW.centroid IS NULL THEN
+            NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry));
+          END IF;
+
+          -- merge in the name, re-init word vector
+          NEW.name := linkedPlacex.name || NEW.name;
+          name_vector := make_keywords(NEW.name);
+
+          -- merge in extra tags
+          NEW.extratags := linkedPlacex.extratags || NEW.extratags;
+
+          -- mark the linked place (excludes from search results)
+          UPDATE placex set linked_place_id = NEW.place_id where place_id = linkedPlacex.place_id;
+
+          -- keep a note of the node id in case we need it for wikipedia in a bit
+          linked_node_id := linkedPlacex.osm_id;
+        END LOOP;
+      END IF;
+
+      IF NEW.centroid IS NOT NULL THEN
+        place_centroid := NEW.centroid;
+      END IF;
+
+      -- Did we gain a wikipedia tag in the process? then we need to recalculate our importance
+      IF NEW.importance is null THEN
+        select language||':'||title,importance from get_wikipedia_match(NEW.extratags) INTO NEW.wikipedia,NEW.importance;
+      END IF;
+      -- Still null? how about looking it up by the node id
+      IF NEW.importance IS NULL THEN
+        select language||':'||title,importance from wikipedia_article where osm_type = 'N' and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
+      END IF;
+
+    END IF;
+
      NEW.parent_place_id = 0;
      parent_place_id_rank = 0;
  
@@ -1426,24 +1625,22 @@ BEGIN
        IF array_upper(isin, 1) IS NOT NULL THEN
          FOR i IN 1..array_upper(isin, 1) LOOP
            address_street_word_id := get_name_id(make_standard_name(isin[i]));
-          IF address_street_word_id IS NOT NULL THEN
-            isin_tokens := isin_tokens + address_street_word_id;
+          IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN
+            isin_tokens := isin_tokens || address_street_word_id;
            END IF;
          END LOOP;
        END IF;
-      isin_tokens := uniq(sort(isin_tokens));
      END IF;
      IF NEW.postcode IS NOT NULL THEN
        isin := regexp_split_to_array(NEW.postcode, E'[;,]');
        IF array_upper(isin, 1) IS NOT NULL THEN
          FOR i IN 1..array_upper(isin, 1) LOOP
            address_street_word_id := get_name_id(make_standard_name(isin[i]));
-          IF address_street_word_id IS NOT NULL THEN
-            isin_tokens := isin_tokens + address_street_word_id;
+          IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN
+            isin_tokens := isin_tokens || address_street_word_id;
            END IF;
          END LOOP;
        END IF;
-      isin_tokens := uniq(sort(isin_tokens));
      END IF;
  --RAISE WARNING 'ISIN: %', isin_tokens;
  
@@ -1542,6 +1739,11 @@ BEGIN
  --      INSERT INTO search_name values (NEW.place_id, NEW.rank_search, NEW.rank_search, 0, NEW.country_code, name_vector, nameaddress_vector, place_centroid);
      END IF;
  
+    -- If we've not managed to pick up a better one - default centroid
+    IF NEW.centroid IS NULL THEN
+      NEW.centroid := place_centroid;
+    END IF;
+
    END IF;
  
    RETURN NEW;
@@ -1553,8 +1755,12 @@ CREATE OR REPLACE FUNCTION placex_delete() RETURNS TRIGGER
    AS $$
  DECLARE
    b BOOLEAN;
+  classtable TEXT;
  BEGIN
  
+  update placex set linked_place_id = null where linked_place_id = OLD.place_id;
+  update placex set indexed_status = 2 where linked_place_id = OLD.place_id and indexed_status = 0;
+
    IF OLD.rank_address < 30 THEN
  
      -- mark everything linked to this place for re-indexing
@@ -1579,6 +1785,13 @@ BEGIN
  
    DELETE FROM place_addressline where place_id = OLD.place_id;
  
+  -- remove from tables for special search
+  classtable := 'place_classtype_' || OLD.class || '_' || OLD.type;
+  SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable INTO b;
+  IF b THEN
+    EXECUTE 'DELETE FROM ' || classtable::regclass || ' WHERE place_id = $1' USING OLD.place_id;
+  END IF;
+
    RETURN OLD;
  
  END;
@@ -1765,12 +1978,12 @@ BEGIN
        update placex set indexed_status = 2 where indexed_status = 0 and 
            (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
            AND NOT (ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
-          AND rank_search > existingplacex.rank_search;
+          AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null);
  
        update placex set indexed_status = 2 where indexed_status = 0 and 
            (ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
            AND NOT (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
-          AND rank_search > existingplacex.rank_search;
+          AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null);
  
      END IF;
  
@@ -1790,7 +2003,8 @@ BEGIN
  
        IF st_area(NEW.geometry) < 0.5 THEN
          UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = existingplacex.place_id 
-          and placex.place_id = place_addressline.place_id and indexed_status = 0;
+          and placex.place_id = place_addressline.place_id and indexed_status = 0
+          and (rank_search < 28 or name is not null);
        END IF;
  
      END IF;
@@ -1898,7 +2112,9 @@ BEGIN
      FOR location IN 
        select nodes from planet_osm_ways where id = way_ids[j] LIMIT 1
      LOOP
-      searchnodes := searchnodes | location.nodes;
+      IF not (ARRAY[location.nodes] <@ searchnodes) THEN
+        searchnodes := searchnodes || location.nodes;
+      END IF;
      END LOOP;
    END LOOP;
  
@@ -1932,7 +2148,7 @@ BEGIN
      IF array_upper(search, 1) IS NOT NULL AND array_upper(location.name, 1) IS NOT NULL THEN
        FOR j IN 1..array_upper(search, 1) LOOP
          FOR k IN 1..array_upper(location.name, 1) LOOP
-          IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result && ARRAY[trim(location.name[k].value)] AND (for_postcode IS NULL OR location.name[k].value ilike for_postcode||'%') THEN
+          IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result @> ARRAY[trim(location.name[k].value)] AND (for_postcode IS NULL OR location.name[k].value ilike for_postcode||'%') THEN
              result[(100 - location.rank_address)] := trim(location.name[k].value);
              found := location.rank_address;
            END IF;
@@ -2364,13 +2580,6 @@ END;
  $$
  LANGUAGE plpgsql;
  
-CREATE AGGREGATE array_agg(INT[])
-(
-    sfunc = array_cat,
-    stype = INT[],
-    initcond = '{}'
-);
-
  CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_startnumber INTEGER, 
    in_endnumber INTEGER, interpolationtype TEXT, 
    in_street TEXT, in_isin TEXT, in_postcode TEXT) RETURNS INTEGER
@@ -2385,8 +2594,8 @@ DECLARE
    numberrange INTEGER;
    rangestartnumber INTEGER;
    place_centroid GEOMETRY;
-  partition INTEGER;
-  parent_place_id BIGINT;
+  out_partition INTEGER;
+  out_parent_place_id BIGINT;
    location RECORD;
    address_street_word_id INTEGER;  
  
@@ -2423,32 +2632,32 @@ BEGIN
    END IF;
  
    place_centroid := ST_Centroid(linegeo);
-  partition := get_partition(place_centroid, 'us');
-  parent_place_id := null;
+  out_partition := get_partition(place_centroid, 'us');
+  out_parent_place_id := null;
  
    address_street_word_id := get_name_id(make_standard_name(in_street));
    IF address_street_word_id IS NOT NULL THEN
-    FOR location IN SELECT * from getNearestNamedRoadFeature(partition, place_centroid, address_street_word_id) LOOP
-      parent_place_id := location.place_id;
+    FOR location IN SELECT * from getNearestNamedRoadFeature(out_partition, place_centroid, address_street_word_id) LOOP
+      out_parent_place_id := location.place_id;
      END LOOP;
    END IF;
  
-  IF parent_place_id IS NULL THEN
-    FOR location IN SELECT place_id FROM getNearestParellelRoadFeature(partition, linegeo) LOOP
-      parent_place_id := location.place_id;
+  IF out_parent_place_id IS NULL THEN
+    FOR location IN SELECT place_id FROM getNearestParellelRoadFeature(out_partition, linegeo) LOOP
+      out_parent_place_id := location.place_id;
      END LOOP;    
    END IF;
  
-  IF parent_place_id IS NULL THEN
-    FOR location IN SELECT place_id FROM getNearestRoadFeature(partition, place_centroid) LOOP
-      parent_place_id := location.place_id;
+  IF out_parent_place_id IS NULL THEN
+    FOR location IN SELECT place_id FROM getNearestRoadFeature(out_partition, place_centroid) LOOP
+      out_parent_place_id := location.place_id;
      END LOOP;    
    END IF;
  
    newpoints := 0;
    FOR housenum IN startnumber..endnumber BY stepsize LOOP
      insert into location_property_tiger (place_id, partition, parent_place_id, housenumber, postcode, centroid)
-    values (nextval('seq_place'), partition, parent_place_id, housenum, in_postcode,
+    values (nextval('seq_place'), out_partition, out_parent_place_id, housenum, in_postcode,
        ST_Line_Interpolate_Point(linegeo, (housenum::float-rangestartnumber::float)/numberrange::float));
      newpoints := newpoints + 1;
    END LOOP;
@@ -2465,7 +2674,7 @@ DECLARE
  
    newpoints INTEGER;
    place_centroid GEOMETRY;
-  partition INTEGER;
+  out_partition INTEGER;
    out_parent_place_id BIGINT;
    location RECORD;
    address_street_word_id INTEGER;  
@@ -2474,18 +2683,18 @@ DECLARE
  BEGIN
  
    place_centroid := ST_Centroid(pointgeo);
-  partition := get_partition(place_centroid, in_countrycode);
+  out_partition := get_partition(place_centroid, in_countrycode);
    out_parent_place_id := null;
  
    address_street_word_id := get_name_id(make_standard_name(in_street));
    IF address_street_word_id IS NOT NULL THEN
-    FOR location IN SELECT * from getNearestNamedRoadFeature(partition, place_centroid, address_street_word_id) LOOP
+    FOR location IN SELECT * from getNearestNamedRoadFeature(out_partition, place_centroid, address_street_word_id) LOOP
        out_parent_place_id := location.place_id;
      END LOOP;
    END IF;
  
    IF out_parent_place_id IS NULL THEN
-    FOR location IN SELECT place_id FROM getNearestRoadFeature(partition, place_centroid) LOOP
+    FOR location IN SELECT place_id FROM getNearestRoadFeature(out_partition, place_centroid) LOOP
        out_parent_place_id := location.place_id;
      END LOOP;    
    END IF;
@@ -2495,15 +2704,124 @@ BEGIN
      SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode;
    END IF;
    IF out_postcode IS NULL THEN
-    out_postcode := getNearestPostcode(partition, place_centroid);
+    out_postcode := getNearestPostcode(out_partition, place_centroid);
    END IF;
  
    newpoints := 0;
    insert into location_property_aux (place_id, partition, parent_place_id, housenumber, postcode, centroid)
-    values (nextval('seq_place'), partition, out_parent_place_id, in_housenumber, out_postcode, place_centroid);
+    values (nextval('seq_place'), out_partition, out_parent_place_id, in_housenumber, out_postcode, place_centroid);
    newpoints := newpoints + 1;
  
    RETURN newpoints;
  END;
  $$
  LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_osm_rel_members(members TEXT[], member TEXT) RETURNS TEXT[]
+  AS $$
+DECLARE
+  result TEXT[];
+  i INTEGER;
+BEGIN
+
+  FOR i IN 1..ARRAY_UPPER(members,1) BY 2 LOOP
+    IF members[i+1] = member THEN
+      result := result || members[i];
+    END IF;
+  END LOOP;
+
+  return result;
+END;
+$$
+LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_osm_rel_members(members TEXT[], memberLabels TEXT[]) RETURNS SETOF TEXT
+  AS $$
+DECLARE
+  i INTEGER;
+BEGIN
+
+  FOR i IN 1..ARRAY_UPPER(members,1) BY 2 LOOP
+    IF members[i+1] = ANY(memberLabels) THEN
+      RETURN NEXT members[i];
+    END IF;
+  END LOOP;
+
+  RETURN;
+END;
+$$
+LANGUAGE plpgsql;
+
+-- See: http://stackoverflow.com/questions/6410088/how-can-i-mimic-the-php-urldecode-function-in-postgresql
+CREATE OR REPLACE FUNCTION decode_url_part(p varchar) RETURNS varchar 
+  AS $$
+SELECT convert_from(CAST(E'\\x' || array_to_string(ARRAY(
+    SELECT CASE WHEN length(r.m[1]) = 1 THEN encode(convert_to(r.m[1], 'SQL_ASCII'), 'hex') ELSE substring(r.m[1] from 2 for 2) END
+    FROM regexp_matches($1, '%[0-9a-f][0-9a-f]|.', 'gi') AS r(m)
+), '') AS bytea), 'UTF8');
+$$ 
+LANGUAGE SQL IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION catch_decode_url_part(p varchar) RETURNS varchar
+  AS $$
+DECLARE
+BEGIN
+  RETURN decode_url_part(p);
+EXCEPTION
+  WHEN others THEN return null;
+END;
+$$
+LANGUAGE plpgsql IMMUTABLE;
+
+DROP TYPE wikipedia_article_match CASCADE;
+create type wikipedia_article_match as (
+  language TEXT,
+  title TEXT,
+  importance FLOAT
+);
+
+CREATE OR REPLACE FUNCTION get_wikipedia_match(extratags HSTORE) RETURNS wikipedia_article_match
+  AS $$
+DECLARE
+  langs TEXT[];
+  i INT;
+  wiki_article TEXT;
+  wiki_article_title TEXT;
+  wiki_article_language TEXT;
+  result wikipedia_article_match;
+BEGIN
+  langs := ARRAY['','ar','bg','ca','cs','da','de','en','es','eo','eu','fa','fr','ko','hi','hr','id','it','he','lt','hu','ms','nl','ja','no','pl','pt','kk','ro','ru','sk','sl','sr','fi','sv','tr','uk','vi','vo','war','zh'];
+  i := 1;
+  WHILE langs[i] IS NOT NULL LOOP
+    wiki_article := extratags->(case when langs[i] = '' THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
+    IF wiki_article is not null THEN
+      wiki_article := replace(regexp_replace(wiki_article,E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_');
+      wiki_article_title := split_part(wiki_article, ':', 2);
+      IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
+        wiki_article_title := wiki_article;
+        wiki_article_language := CASE WHEN langs[i] = '' THEN 'en' ELSE langs[i] END;
+      ELSE
+        wiki_article_language := lower(split_part(wiki_article, ':', 1));
+      END IF;
+
+      select wikipedia_article.language,wikipedia_article.title,wikipedia_article.importance
+        from wikipedia_article 
+        where language = wiki_article_language and 
+        (title = wiki_article_title OR title = catch_decode_url_part(wiki_article_title) OR title = replace(catch_decode_url_part(wiki_article_title),E'\\',''))
+      UNION ALL
+      select wikipedia_article.language,wikipedia_article.title,wikipedia_article.importance
+        from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title)
+        where wikipedia_redirect.language = wiki_article_language and 
+        (from_title = wiki_article_title OR from_title = catch_decode_url_part(wiki_article_title) OR from_title = replace(catch_decode_url_part(wiki_article_title),E'\\',''))
+      order by importance desc limit 1 INTO result;
+
+      IF result.language is not null THEN
+        return result;
+      END IF;
+    END IF;
+    i := i + 1;
+  END LOOP;
+  RETURN NULL;
+END;
+$$
+LANGUAGE plpgsql;