Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Mon, 31 Mar 2025 15:14:19 +0000 (17:14 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Mon, 31 Mar 2025 15:14:19 +0000 (17:14 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Mon, 31 Mar 2025 15:14:19 +0000 (17:14 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Mon, 31 Mar 2025 15:14:19 +0000 (17:14 +0200)
diff --git a/lib-sql/functions.sql b/lib-sql/functions.sql

index 158969d95e124878884d05c72a3b491fb3a7606c..737a3f2193a159f961dcd260f5232f4641e58fcb 100644 (file)
--- a/lib-sql/functions.sql
+++ b/lib-sql/functions.sql
@@ -8,7 +8,6 @@
  {% include('functions/utils.sql') %}
  {% include('functions/ranking.sql') %}
  {% include('functions/importance.sql') %}
-{% include('functions/address_lookup.sql') %}
  {% include('functions/interpolation.sql') %}
  
  {% if 'place' in db.tables %}
diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql

deleted file mode 100644 (file)

index b59b765..0000000
--- a/lib-sql/functions/address_lookup.sql
+++ /dev/null
@@ -1,334 +0,0 @@
--- SPDX-License-Identifier: GPL-2.0-only
---
--- This file is part of Nominatim. (https://nominatim.org)
---
--- Copyright (C) 2022 by the Nominatim developer community.
--- For a full list of authors see the git log.
-
--- Functions for returning address information for a place.
-
-DROP TYPE IF EXISTS addressline CASCADE;
-CREATE TYPE addressline as (
-  place_id BIGINT,
-  osm_type CHAR(1),
-  osm_id BIGINT,
-  name HSTORE,
-  class TEXT,
-  type TEXT,
-  place_type TEXT,
-  admin_level INTEGER,
-  fromarea BOOLEAN,
-  isaddress BOOLEAN,
-  rank_address INTEGER,
-  distance FLOAT
-);
-
-
-CREATE OR REPLACE FUNCTION get_name_by_language(name hstore, languagepref TEXT[])
-  RETURNS TEXT
-  AS $$
-DECLARE
-  result TEXT;
-BEGIN
-  IF name is null THEN
-    RETURN null;
-  END IF;
-
-  FOR j IN 1..array_upper(languagepref,1) LOOP
-    IF name ? languagepref[j] THEN
-      result := trim(name->languagepref[j]);
-      IF result != '' THEN
-        return result;
-      END IF;
-    END IF;
-  END LOOP;
-
-  -- as a fallback - take the last element since it is the default name
-  RETURN trim((avals(name))[array_length(avals(name), 1)]);
-END;
-$$
-LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
-
-
---housenumber only needed for tiger data
-CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT,
-                                                   housenumber INTEGER,
-                                                   languagepref TEXT[])
-  RETURNS TEXT
-  AS $$
-DECLARE
-  result TEXT[];
-  currresult TEXT;
-  prevresult TEXT;
-  location RECORD;
-BEGIN
-
-  result := '{}';
-  prevresult := '';
-
-  FOR location IN
-    SELECT name,
-       CASE WHEN place_id = for_place_id THEN 99 ELSE rank_address END as rank_address
-    FROM get_addressdata(for_place_id, housenumber)
-    WHERE isaddress order by rank_address desc
-  LOOP
-    currresult := trim(get_name_by_language(location.name, languagepref));
-    IF currresult != prevresult AND currresult IS NOT NULL
-       AND result[(100 - location.rank_address)] IS NULL
-    THEN
-      result[(100 - location.rank_address)] := currresult;
-      prevresult := currresult;
-    END IF;
-  END LOOP;
-
-  RETURN array_to_string(result,', ');
-END;
-$$
-LANGUAGE plpgsql STABLE PARALLEL SAFE;
-
-DROP TYPE IF EXISTS addressdata_place;
-CREATE TYPE addressdata_place AS (
-  place_id BIGINT,
-  country_code VARCHAR(2),
-  housenumber TEXT,
-  postcode TEXT,
-  class TEXT,
-  type TEXT,
-  name HSTORE,
-  address HSTORE,
-  centroid GEOMETRY
-);
-
--- Compute the list of address parts for the given place.
---
--- If in_housenumber is greator or equal 0, look for an interpolation.
-CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT, in_housenumber INTEGER)
-  RETURNS setof addressline
-  AS $$
-DECLARE
-  place addressdata_place;
-  location RECORD;
-  country RECORD;
-  current_rank_address INTEGER;
-  location_isaddress BOOLEAN;
-BEGIN
-  -- The place in question might not have a direct entry in place_addressline.
-  -- Look for the parent of such places then and save it in place.
-
-  -- first query osmline (interpolation lines)
-  IF in_housenumber >= 0 THEN
-    SELECT parent_place_id as place_id, country_code,
-           in_housenumber as housenumber, postcode,
-           'place' as class, 'house' as type,
-           null as name, null as address,
-           ST_Centroid(linegeo) as centroid
-      INTO place
-      FROM location_property_osmline
-      WHERE place_id = in_place_id
-            AND in_housenumber between startnumber and endnumber;
-  END IF;
-
-  --then query tiger data
-  {% if config.get_bool('USE_US_TIGER_DATA') %}
-  IF place IS NULL AND in_housenumber >= 0 THEN
-    SELECT parent_place_id as place_id, 'us' as country_code,
-           in_housenumber as housenumber, postcode,
-           'place' as class, 'house' as type,
-           null as name, null as address,
-           ST_Centroid(linegeo) as centroid
-      INTO place
-      FROM location_property_tiger
-      WHERE place_id = in_place_id
-            AND in_housenumber between startnumber and endnumber;
-  END IF;
-  {% endif %}
-
-  -- postcode table
-  IF place IS NULL THEN
-    SELECT parent_place_id as place_id, country_code,
-           null::text as housenumber, postcode,
-           'place' as class, 'postcode' as type,
-           null as name, null as address,
-           null as centroid
-      INTO place
-      FROM location_postcode
-      WHERE place_id = in_place_id;
-  END IF;
-
-  -- POI objects in the placex table
-  IF place IS NULL THEN
-    SELECT parent_place_id as place_id, country_code,
-           coalesce(address->'housenumber',
-                    address->'streetnumber',
-                    address->'conscriptionnumber')::text as housenumber,
-           postcode,
-           class, type,
-           name, address,
-           centroid
-      INTO place
-      FROM placex
-      WHERE place_id = in_place_id and rank_search > 27;
-  END IF;
-
-  -- If place is still NULL at this point then the object has its own
-  -- entry in place_address line. However, still check if there is not linked
-  -- place we should be using instead.
-  IF place IS NULL THEN
-    select coalesce(linked_place_id, place_id) as place_id,  country_code,
-           null::text as housenumber, postcode,
-           class, type,
-           null as name, address,
-           null as centroid
-      INTO place
-      FROM placex where place_id = in_place_id;
-  END IF;
-
---RAISE WARNING '% % % %',searchcountrycode, searchhousenumber, searchpostcode;
-
-  -- --- Return the record for the base entry.
-
-  current_rank_address := 1000;
-  FOR location IN
-    SELECT placex.place_id, osm_type, osm_id, name,
-           coalesce(extratags->'linked_place', extratags->'place') as place_type,
-           class, type, admin_level,
-           CASE WHEN rank_address = 0 THEN 100
-                WHEN rank_address = 11 THEN 5
-                ELSE rank_address END as rank_address,
-           country_code
-      FROM placex
-      WHERE place_id = place.place_id
-  LOOP
---RAISE WARNING '%',location;
-    -- mix in default names for countries
-    IF location.rank_address = 4 and place.country_code is not NULL THEN
-      FOR country IN
-        SELECT coalesce(name, ''::hstore) as name FROM country_name
-          WHERE country_code = place.country_code LIMIT 1
-      LOOP
-        place.name := country.name || place.name;
-      END LOOP;
-    END IF;
-
-    IF location.rank_address < 4 THEN
-      -- no country locations for ranks higher than country
-      place.country_code := NULL::varchar(2);
-    ELSEIF place.country_code IS NULL AND location.country_code IS NOT NULL THEN
-      place.country_code := location.country_code;
-    END IF;
-
-    RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id,
-                    location.name, location.class, location.type,
-                    location.place_type,
-                    location.admin_level, true,
-                    location.type not in ('postcode', 'postal_code'),
-                    location.rank_address, 0)::addressline;
-
-    current_rank_address := location.rank_address;
-  END LOOP;
-
-  -- --- Return records for address parts.
-
-  FOR location IN
-    SELECT placex.place_id, osm_type, osm_id, name, class, type,
-           coalesce(extratags->'linked_place', extratags->'place') as place_type,
-           admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
-           CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
-           distance, country_code, postcode
-      FROM place_addressline join placex on (address_place_id = placex.place_id)
-      WHERE place_addressline.place_id IN (place.place_id, in_place_id)
-            AND linked_place_id is null
-            AND (placex.country_code IS NULL OR place.country_code IS NULL
-                 OR placex.country_code = place.country_code)
-      ORDER BY rank_address desc,
-               (place_addressline.place_id = in_place_id) desc,
-               (CASE WHEN coalesce((avals(name) && avals(place.address)), False) THEN 2
-                     WHEN isaddress THEN 0
-                     WHEN fromarea
-                          and place.centroid is not null
-                          and ST_Contains(geometry, place.centroid) THEN 1
-                     ELSE -1 END) desc,
-               fromarea desc, distance asc, rank_search desc
-  LOOP
-    -- RAISE WARNING '%',location;
-    location_isaddress := location.rank_address != current_rank_address;
-
-    IF place.country_code IS NULL AND location.country_code IS NOT NULL THEN
-      place.country_code := location.country_code;
-    END IF;
-    IF location.type in ('postcode', 'postal_code')
-       AND place.postcode is not null
-    THEN
-      -- If the place had a postcode assigned, take this one only
-      -- into consideration when it is an area and the place does not have
-      -- a postcode itself.
-      IF location.fromarea AND location_isaddress
-         AND (place.address is null or not place.address ? 'postcode')
-      THEN
-        place.postcode := null; -- remove the less exact postcode
-      ELSE
-        location_isaddress := false;
-      END IF;
-    END IF;
-    RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id,
-                    location.name, location.class, location.type,
-                    location.place_type,
-                    location.admin_level, location.fromarea,
-                    location_isaddress,
-                    location.rank_address,
-                    location.distance)::addressline;
-
-    current_rank_address := location.rank_address;
-  END LOOP;
-
-  -- If no country was included yet, add the name information from country_name.
-  IF current_rank_address > 4 THEN
-    FOR location IN
-      SELECT name || coalesce(derived_name, ''::hstore) as name FROM country_name
-        WHERE country_code = place.country_code LIMIT 1
-    LOOP
---RAISE WARNING '% % %',current_rank_address,searchcountrycode,countryname;
-      RETURN NEXT ROW(null, null, null, location.name, 'place', 'country', NULL,
-                      null, true, true, 4, 0)::addressline;
-    END LOOP;
-  END IF;
-
-  -- Finally add some artificial rows.
-  IF place.country_code IS NOT NULL THEN
-    location := ROW(null, null, null, hstore('ref', place.country_code),
-                    'place', 'country_code', null, null, true, false, 4, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  IF place.name IS NOT NULL THEN
-    location := ROW(in_place_id, null, null, place.name, place.class,
-                    place.type, null, null, true, true, 29, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  IF place.housenumber IS NOT NULL THEN
-    location := ROW(null, null, null, hstore('ref', place.housenumber),
-                    'place', 'house_number', null, null, true, true, 28, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  IF place.address is not null and place.address ? '_unlisted_place' THEN
-    RETURN NEXT ROW(null, null, null, hstore('name', place.address->'_unlisted_place'),
-                    'place', 'locality', null, null, true, true, 25, 0)::addressline;
-  END IF;
-
-  IF place.postcode is not null THEN
-    location := ROW(null, null, null, hstore('ref', place.postcode), 'place',
-                    'postcode', null, null, false, true, 5, 0)::addressline;
-    RETURN NEXT location;
-  ELSEIF place.address is not null and place.address ? 'postcode'
-         and not place.address->'postcode' SIMILAR TO '%(,|;)%' THEN
-    location := ROW(null, null, null, hstore('ref', place.address->'postcode'), 'place',
-                    'postcode', null, null, false, true, 5, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  RETURN;
-END;
-$$
-LANGUAGE plpgsql STABLE PARALLEL SAFE;
diff --git a/lib-sql/tokenizer/icu_tokenizer.sql b/lib-sql/tokenizer/icu_tokenizer.sql

index f0c30f1b786471d210cd96b28fd49fd149487ea2..8cf13120a11515db4fbe432c131332fdea054a45 100644 (file)
--- a/lib-sql/tokenizer/icu_tokenizer.sql
+++ b/lib-sql/tokenizer/icu_tokenizer.sql
@@ -128,16 +128,14 @@ DECLARE
    partial_terms TEXT[] = '{}'::TEXT[];
    term TEXT;
    term_id INTEGER;
-  term_count INTEGER;
  BEGIN
    SELECT min(word_id) INTO full_token
      FROM word WHERE word = norm_term and type = 'W';
  
    IF full_token IS NULL THEN
      full_token := nextval('seq_word');
-    INSERT INTO word (word_id, word_token, type, word, info)
-      SELECT full_token, lookup_term, 'W', norm_term,
-             json_build_object('count', 0)
+    INSERT INTO word (word_id, word_token, type, word)
+      SELECT full_token, lookup_term, 'W', norm_term
          FROM unnest(lookup_terms) as lookup_term;
    END IF;
  
@@ -150,14 +148,67 @@ BEGIN
  
    partial_tokens := '{}'::INT[];
    FOR term IN SELECT unnest(partial_terms) LOOP
-    SELECT min(word_id), max(info->>'count') INTO term_id, term_count
+    SELECT min(word_id) INTO term_id
+      FROM word WHERE word_token = term and type = 'w';
+
+    IF term_id IS NULL THEN
+      term_id := nextval('seq_word');
+      INSERT INTO word (word_id, word_token, type)
+        VALUES (term_id, term, 'w');
+    END IF;
+
+    partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
+  END LOOP;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_full_word(norm_term TEXT,
+                                                 lookup_terms TEXT[],
+                                                 lookup_norm_terms TEXT[],
+                                                 OUT full_token INT,
+                                                 OUT partial_tokens INT[])
+  AS $$
+DECLARE
+  partial_terms TEXT[] = '{}'::TEXT[];
+  term TEXT;
+  term_id INTEGER;
+BEGIN
+  SELECT min(word_id) INTO full_token
+    FROM word WHERE word = norm_term and type = 'W';
+
+  IF full_token IS NULL THEN
+    full_token := nextval('seq_word');
+    IF lookup_norm_terms IS NULL THEN
+      INSERT INTO word (word_id, word_token, type, word)
+        SELECT full_token, lookup_term, 'W', norm_term
+          FROM unnest(lookup_terms) as lookup_term;
+    ELSE
+      INSERT INTO word (word_id, word_token, type, word, info)
+        SELECT full_token, t.lookup, 'W', norm_term,
+               CASE WHEN norm_term = t.norm THEN null
+               ELSE json_build_object('lookup', t.norm) END
+          FROM unnest(lookup_terms, lookup_norm_terms) as t(lookup, norm);
+    END IF;
+  END IF;
+
+  FOR term IN SELECT unnest(string_to_array(unnest(lookup_terms), ' ')) LOOP
+    term := trim(term);
+    IF NOT (ARRAY[term] <@ partial_terms) THEN
+      partial_terms := partial_terms || term;
+    END IF;
+  END LOOP;
+
+  partial_tokens := '{}'::INT[];
+  FOR term IN SELECT unnest(partial_terms) LOOP
+    SELECT min(word_id) INTO term_id
        FROM word WHERE word_token = term and type = 'w';
  
      IF term_id IS NULL THEN
        term_id := nextval('seq_word');
-      term_count := 0;
-      INSERT INTO word (word_id, word_token, type, info)
-        VALUES (term_id, term, 'w', json_build_object('count', term_count));
+      INSERT INTO word (word_id, word_token, type)
+        VALUES (term_id, term, 'w');
      END IF;
  
      partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
diff --git a/settings/icu-rules/variants-en.yaml b/settings/icu-rules/variants-en.yaml

index 99cd6da66954e867004e076d33f13164db092185..54a7b475c68547c31540179065adec0ca5bec158 100644 (file)
--- a/settings/icu-rules/variants-en.yaml
+++ b/settings/icu-rules/variants-en.yaml
@@ -6,25 +6,19 @@
      -  Air Force Base -> AFB
      -  Air National Guard Base -> ANGB
      -  Airport -> Aprt
-    -  Alley -> Al
-    -  Alley -> All
-    -  Alley -> Ally
-    -  Alley -> Aly
+    -  Alley -> Al,All,Ally,Aly
      -  Alleyway -> Alwy
      -  Amble -> Ambl
      -  Anex -> Anx
      -  Apartments -> Apts
-    -  Approach -> Apch
-    -  Approach -> App
+    -  Approach -> Apch,App
      -  Arcade -> Arc
      -  Arterial -> Artl
      -  Artery -> Arty
-    -  Avenue -> Av
-    -  Avenue -> Ave
+    -  Avenue -> Av,Ave
      -  Back -> Bk
      -  Banan -> Ba
-    -  Basin -> Basn
-    -  Basin -> Bsn
+    -  Basin -> Basn,Bsn
      -  Bayou -> Byu
      -  Beach -> Bch
      -  Bend -> Bnd
@@ -33,71 +27,51 @@
      -  Bluffs -> Blfs
      -  Boardwalk -> Bwlk
      -  Bottom -> Btm
-    -  Boulevard -> Blvd
-    -  Boulevard -> Bvd
+    -  Boulevard -> Blvd,Bvd
      -  Boundary -> Bdy
      -  Bowl -> Bl
      -  Brace -> Br
      -  Brae -> Br
      -  Branch -> Br
      -  Break -> Brk
-    -  Bridge -> Bdge
-    -  Bridge -> Br
-    -  Bridge -> Brdg
-    -  Bridge -> Brg
-    -  Bridge -> Bri
-    -  Broadway -> Bdwy
-    -  Broadway -> Bway
-    -  Broadway -> Bwy
+    -  Bridge$ -> Bdge,Br,Brdg,Brg,Bri
+    -  Broadway -> Bdwy,Bway,Bwy
      -  Brook -> Brk
      -  Brooks -> Brks
      -  Brow -> Brw
-    -  Buildings -> Bldgs
-    -  Buildings -> Bldngs
+    -  Buildings -> Bldgs,Bldngs
      -  Business -> Bus
      -  Burg -> Bg
      -  Burgs -> Bgs
-    -  Bypass -> Bps
-    -  Bypass -> Byp
-    -  Bypass -> Bypa
+    -  Bypass -> Bps,Byp,Bypa
      -  Byway -> Bywy
      -  Camp -> Cp
      -  Canyon -> Cyn
      -  Cape -> Cpe
      -  Caravan -> Cvn
-    -  Causeway -> Caus
-    -  Causeway -> Cswy
-    -  Causeway -> Cway
-    -  Center -> Cen
-    -  Center -> Ctr
+    -  Causeway -> Caus,Cswy,Cway
+    -  Center,Centre -> Cen,Ctr
      -  Centers -> Ctrs
      -  Central -> Ctrl
-    -  Centre -> Cen
-    -  Centre -> Ctr
      -  Centreway -> Cnwy
      -  Chase -> Ch
      -  Church -> Ch
      -  Circle -> Cir
      -  Circles -> Cirs
-    -  Circuit -> Cct
-    -  Circuit -> Ci
-    -  Circus -> Crc
-    -  Circus -> Crcs
+    -  Circuit -> Cct,Ci
+    -  Circus -> Crc,Crcs
      -  City -> Cty
      -  Cliff -> Clf
      -  Cliffs -> Clfs
      -  Close -> Cl
      -  Club -> Clb
-    -  Common -> Cmn
-    -  Common -> Comm
+    -  Common -> Cmn,Comm
      -  Commons -> Cmns
      -  Community -> Comm
      -  Concourse -> Cnc
      -  Concourse -> Con
      -  Copse -> Cps
-    -  Corner -> Cor
-    -  Corner -> Cnr
-    -  Corner -> Crn
+    -  Corner -> Cor,Cnr,Crn
      -  Corners -> Cors
      -  Corso -> Cso
      -  Cottages -> Cotts
@@ -105,36 +79,24 @@
      -  County Road -> CR
      -  County Route -> CR
      -  Course -> Crse
-    -  Court -> Crt
-    -  Court -> Ct
+    -  Court -> Crt,Ct
      -  Courts -> Cts
      -  Courtyard -> Cyd
      -  Courtyard -> Ctyd
-    -  Cove -> Ce
-    -  Cove -> Cov
-    -  Cove -> Cv
+    -  Cove$ -> Ce,Cov,Cv
      -  Coves -> Cvs
-    -  Creek -> Ck
-    -  Creek -> Cr
-    -  Creek -> Crk
+    -  Creek$ -> Ck,Cr,Crk
      -  Crescent -> Cr
      -  Crescent -> Cres
-    -  Crest -> Crst
-    -  Crest -> Cst
+    -  Crest -> Crst,Cst
      -  Croft -> Cft
-    -  Cross -> Cs
-    -  Cross -> Crss
-    -  Crossing -> Crsg
-    -  Crossing -> Csg
-    -  Crossing -> Xing
-    -  Crossroad -> Crd
-    -  Crossroad -> Xrd
+    -  Cross -> Cs,Crss
+    -  Crossing -> Crsg,Csg,Xing
+    -  Crossroad -> Crd,Xrd
      -  Crossroads -> Xrds
      -  Crossway -> Cowy
-    -  Cul-de-sac -> Cds
-    -  Cul-de-sac -> Csac
-    -  Curve -> Cve
-    -  Curve -> Curv
+    -  Cul-de-sac -> Cds,Csac
+    -  Curve -> Cve,Curv
      -  Cutting -> Cutt
      -  Dale -> Dle
      -  Dam -> Dm
@@ -143,14 +105,10 @@
      -  Divide -> Dv
      -  Down -> Dn
      -  Downs -> Dn
-    -  Drive -> Dr
-    -  Drive -> Drv
-    -  Drive -> Dv
+    -  Drive -> Dr,Drv,Dv
      -  Drives -> Drs
      -  Drive-In => Drive-In # prevent abbreviation here
-    -  Driveway -> Drwy
-    -  Driveway -> Dvwy
-    -  Driveway -> Dwy
+    -  Driveway -> Drwy,Dvwy,Dwy
      -  East -> E
      -  Edge -> Edg
      -  Elbow -> Elb
@@ -158,25 +116,18 @@
      -  Esplanade -> Esp
      -  Estate -> Est
      -  Estates -> Ests
-    -  Expressway -> Exp
-    -  Expressway -> Expy
-    -  Expressway -> Expwy
-    -  Expressway -> Xway
+    -  Expressway -> Exp,Expy,Expwy,Xway
      -  Extension -> Ex
      -  Extensions -> Exts
-    -  Fairway -> Fawy
-    -  Fairway -> Fy
+    -  Fairway -> Fawy,Fy
      -  Falls -> Fls
      -  Father -> Fr
-    -  Ferry -> Fy
-    -  Ferry -> Fry
-    -  Field -> Fd
-    -  Field -> Fld
+    -  Ferry -> Fy,Fry
+    -  Field -> Fd,Fld
      -  Fields -> Flds
      -  Fire Track -> Ftrk
      -  Firetrail -> Fit
-    -  Flat -> Fl
-    -  Flat -> Flt
+    -  Flat -> Fl,Flt
      -  Flats -> Flts
      -  Follow -> Folw
      -  Footway -> Ftwy
@@ -191,67 +142,47 @@
      -  Fork -> Frk
      -  Forks -> Frks
      -  Fort -> Ft
-    -  Freeway -> Frwy
-    -  Freeway -> Fwy
+    -  Freeway -> Frwy,Fwy
      -  Front -> Frnt
-    -  Frontage -> Fr
-    -  Frontage -> Frtg
+    -  Frontage -> Fr,Frtg
      -  Garden -> Gdn
-    -  Gardens -> Gdn
-    -  Gardens -> Gdns
-    -  Gate -> Ga
-    -  Gate -> Gte
-    -  Gates -> Ga
-    -  Gates -> Gte
-    -  Gateway -> Gwy
-    -  Gateway -> Gtwy
+    -  Gardens -> Gdn,Gdns
+    -  Gate,Gates -> Ga,Gte
+    -  Gateway -> Gwy,Gtwy
      -  George -> Geo
-    -  Glade -> Gl
-    -  Glade -> Gld
-    -  Glade -> Glde
+    -  Glade$ -> Gl,Gld,Glde
      -  Glen -> Gln
      -  Glens -> Glns
      -  Grange -> Gra
-    -  Green -> Gn
-    -  Green -> Grn
+    -  Green -> Gn,Grn
      -  Greens -> Grns
      -  Ground -> Grnd
-    -  Grove -> Gr
-    -  Grove -> Gro
-    -  Grove -> Grv
+    -  Grove$ -> Gr,Gro,Grv
      -  Groves -> Grvs
      -  Grovet -> Gr
      -  Gully -> Gly
-    -  Harbor -> Hbr
+    -  Harbor -> Hbr,Harbour
      -  Harbors -> Hbrs
-    -  Harbour -> Hbr
+    -  Harbour -> Hbr,Harbor
      -  Haven -> Hvn
      -  Head -> Hd
      -  Heads -> Hd
-    -  Heights -> Hgts
-    -  Heights -> Ht
-    -  Heights -> Hts
+    -  Heights -> Hgts,Ht,Hts
      -  High School -> HS
-    -  Highroad -> Hird
-    -  Highroad -> Hrd
+    -  Highroad -> Hird,Hrd
      -  Highway -> Hwy
      -  Hill -> Hl
-    -  Hills -> Hl
-    -  Hills -> Hls
+    -  Hills -> Hl,Hls
      -  Hollow -> Holw
      -  Hospital -> Hosp
-    -  House -> Ho
-    -  House -> Hse
+    -  House -> Ho,Hse
      -  Industrial -> Ind
      -  Inlet -> Inlt
      -  Interchange -> Intg
      -  International -> Intl
-    -  Island -> I
-    -  Island -> Is
+    -  Island -> I,Is
      -  Islands -> Iss
-    -  Junction -> Jct
-    -  Junction -> Jctn
-    -  Junction -> Jnc
+    -  Junction -> Jct,Jctn,Jnc
      -  Junctions -> Jcts
      -  Junior -> Jr
      -  Key -> Ky
@@ -260,40 +191,31 @@
      -  Knolls -> Knls
      -  Lagoon -> Lgn
      -  Lake -> Lk
-    -  Lakes -> L
-    -  Lakes -> Lks
-    -  Landing -> Ldg
-    -  Landing -> Lndg
-    -  Lane -> La
-    -  Lane -> Ln
+    -  Lakes -> L,Lks
+    -  Landing -> Ldg,Lndg
+    -  Lane -> La,Ln
      -  Laneway -> Lnwy
      -  Light -> Lgt
      -  Lights -> Lgts
      -  Line -> Ln
      -  Link -> Lk
-    -  Little -> Lit
-    -  Little -> Lt
+    -  Little -> Lit,Lt
      -  Loaf -> Lf
      -  Lock -> Lck
      -  Locks -> Lcks
      -  Lodge -> Ldg
      -  Lookout -> Lkt
      -  Loop -> Lp
-    -  Lower -> Low
-    -  Lower -> Lr
-    -  Lower -> Lwr
+    -  Lower -> Low,Lr,Lwr
      -  Mall -> Ml
      -  Manor -> Mnr
      -  Manors -> Mnrs
      -  Mansions -> Mans
      -  Market -> Mkt
      -  Meadow -> Mdw
-    -  Meadows -> Mdw
-    -  Meadows -> Mdws
+    -  Meadows -> Mdw,Mdws
      -  Mead -> Md
-    -  Meander -> Mdr
-    -  Meander -> Mndr
-    -  Meander -> Mr
+    -  Meander -> Mdr,Mndr,Mr
      -  Medical -> Med
      -  Memorial -> Mem
      -  Mews -> Mw
@@ -304,12 +226,10 @@
      -  Mill -> Ml
      -  Mills -> Mls
      -  Mission -> Msn
-    -  Motorway -> Mtwy
-    -  Motorway -> Mwy
+    -  Motorway -> Mtwy,Mwy
      -  Mount -> Mt
      -  Mountain -> Mtn
-    -  Mountains -> Mtn
-    -  Mountains -> Mtns
+    -  Mountains$ -> Mtn,Mtns
      -  Municipal -> Mun
      -  Museum -> Mus
      -  National Park -> NP
@@ -321,50 +241,37 @@
      -  Northeast -> NE
      -  Northwest -> NW
      -  Orchard -> Orch
-    -  Outlook -> Out
-    -  Outlook -> Otlk
+    -  Outlook -> Out,Otlk
      -  Overpass -> Opas
      -  Parade -> Pde
      -  Paradise -> Pdse
      -  Park -> Pk
      -  Parklands -> Pkld
-    -  Parkway -> Pkwy
-    -  Parkway -> Pky
-    -  Parkway -> Pwy
+    -  Parkway -> Pkwy,Pky,Pwy
      -  Parkways -> Pkwy
      -  Pass -> Ps
      -  Passage -> Psge
-    -  Pathway -> Phwy
-    -  Pathway -> Pway
-    -  Pathway -> Pwy
+    -  Pathway -> Phwy,Pway,Pwy
      -  Piazza -> Piaz
      -  Pike -> Pk
      -  Pine -> Pne
      -  Pines -> Pnes
      -  Place -> Pl
-    -  Plain -> Pl
-    -  Plain -> Pln
-    -  Plains -> Pl
-    -  Plains -> Plns
+    -  Plain -> Pl,Pln
+    -  Plains -> Pl,Plns
      -  Plateau -> Plat
-    -  Plaza -> Pl
-    -  Plaza -> Plz
-    -  Plaza -> Plza
+    -  Plaza -> Pl,Plz,Plza
      -  Pocket -> Pkt
-    -  Point -> Pnt
-    -  Point -> Pt
+    -  Point -> Pnt,Pt
      -  Points -> Pts
-    -  Port -> Prt
-    -  Port -> Pt
+    -  Port -> Prt,Pt
      -  Ports -> Prts
      -  Post Office -> PO
      -  Prairie -> Pr
      -  Precinct -> Pct
-    -  Promenade -> Prm
-    -  Promenade -> Prom
+    -  Promenade -> Prm,Prom
      -  Quadrangle -> Qdgl
-    -  Quadrant -> Qdrt
-    -  Quadrant -> Qd
+    -  Quadrant -> Qdrt,Qd
      -  Quay -> Qy
      -  Quays -> Qy
      -  Quays -> Qys
@@ -372,8 +279,7 @@
      -  Ramble -> Ra
      -  Ramble -> Rmbl
      -  Ranch -> Rnch
-    -  Range -> Rge
-    -  Range -> Rnge
+    -  Range -> Rge,Rnge
      -  Rapid -> Rpd
      -  Rapids -> Rpds
      -  Reach -> Rch
@@ -381,37 +287,31 @@
      -  Reserve -> Res
      -  Reservoir -> Res
      -  Rest -> Rst
-    -  Retreat -> Rt
-    -  Retreat -> Rtt
+    -  Retreat -> Rt,Rtt
      -  Return -> Rtn
-    -  Ridge -> Rdg
-    -  Ridge -> Rdge
+    -  Ridge -> Rdg,Rdge
      -  Ridges -> Rdgs
      -  Ridgeway -> Rgwy
      -  Right of Way -> Rowy
      -  Rise -> Ri
-    -  River -> R
-    -  River -> Riv
-    -  River -> Rvr
+    -  ^River -> R,Riv,Rvr
+    -  River$ -> R,Riv,Rvr
      -  Riverway -> Rvwy
      -  Riviera -> Rvra
      -  Road -> Rd
      -  Roads -> Rds
      -  Roadside -> Rdsd
-    -  Roadway -> Rdwy
-    -  Roadway -> Rdy
+    -  Roadway -> Rdwy,Rdy
      -  Rocks -> Rks
      -  Ronde -> Rnde
      -  Rosebowl -> Rsbl
      -  Rotary -> Rty
      -  Round -> Rnd
-    -  Route -> Rt
-    -  Route -> Rte
+    -  Route -> Rt,Rte
      -  Saint -> St
      -  Saints -> SS
      -  Senior -> Sr
-    -  Serviceway -> Swy
-    -  Serviceway -> Svwy
+    -  Serviceway -> Swy,Svwy
      -  Shoal -> Shl
      -  Shore -> Shr
      -  Shores -> Shrs
@@ -421,8 +321,7 @@
      -  Skyway -> Skwy
      -  Slope -> Slpe
      -  Sound -> Snd
-    -  South -> S
-    -  South -> Sth
+    -  South -> S,Sth
      -  Southeast -> SE
      -  Southwest -> SW
      -  Spring -> Spg
@@ -431,13 +330,10 @@
      -  Square -> Sq
      -  Squares -> Sqs
      -  Stairway -> Strwy
-    -  State Highway -> SH
-    -  State Highway -> SHwy
+    -  State Highway -> SH,SHwy
      -  State Route -> SR
-    -  Station -> Sta
-    -  Station -> Stn
-    -  Strand -> Sd
-    -  Strand -> Stra
+    -  Station -> Sta,Stn
+    -  Strand -> Sd,Stra
      -  Stravenue -> Stra
      -  Stream -> Strm
      -  Street -> St
@@ -447,61 +343,43 @@
      -  Summit -> Smt
      -  Tarn -> Tn
      -  Terminal -> Term
-    -  Terrace -> Tce
-    -  Terrace -> Ter
-    -  Terrace -> Terr
-    -  Thoroughfare -> Thfr
-    -  Thoroughfare -> Thor
+    -  Terrace -> Tce,Ter,Terr
+    -  Thoroughfare -> Thfr,Thor
      -  Throughway -> Trwy
-    -  Tollway -> Tlwy
-    -  Tollway -> Twy
+    -  Tollway -> Tlwy,Twy
      -  Towers -> Twrs
      -  Township -> Twp
      -  Trace -> Trce
-    -  Track -> Tr
-    -  Track -> Trak
-    -  Track -> Trk
+    -  Track -> Tr,Trak,Trk
      -  Trafficway -> Trfy
      -  Trail -> Trl
      -  Trailer -> Trlr
      -  Triangle -> Tri
      -  Trunkway -> Tkwy
-    -  Tunnel -> Tun
-    -  Tunnel -> Tunl
-    -  Turn -> Tn
-    -  Turn -> Trn
-    -  Turnpike -> Tpk
-    -  Turnpike -> Tpke
-    -  Underpass -> Upas
-    -  Underpass -> Ups
+    -  Tunnel -> Tun,Tunl
+    -  Turn -> Tn,Trn
+    -  Turnpike -> Tpk,Tpke
+    -  Underpass -> Upas,Ups
      -  Union -> Un
      -  Unions -> Uns
-    -  University -> Uni
-    -  University -> Univ
+    -  University -> Uni,Univ
      -  Upper -> Up
      -  Upper -> Upr
      -  Vale -> Va
      -  Valley -> Vly
      -  Valley -> Vy
      -  Valleys -> Vlys
-    -  Viaduct -> Vdct
-    -  Viaduct -> Via
-    -  Viaduct -> Viad
+    -  Viaduct$ -> Vdct,Via,Viad
      -  View -> Vw
      -  Views -> Vws
-    -  Village -> Vill
-    -  Village -> Vlg
+    -  Village -> Vill,Vlg
      -  Villages -> Vlgs
      -  Villas -> Vlls
      -  Ville -> Vl
-    -  Vista -> Vis
-    -  Vista -> Vst
-    -  Vista -> Vsta
-    -  Walk -> Wk
-    -  Walk -> Wlk
+    -  Vista -> Vis,Vst,Vsta
+    -  Walk -> Wk,Wlk
      -  Walks -> Walk
-    -  Walkway -> Wkwy
-    -  Walkway -> Wky
+    -  Walkway -> Wkwy,Wky
      -  Waters -> Wtr
      -  Way -> Wy
      -  Well -> Wl
diff --git a/src/nominatim_api/search/db_search_builder.py b/src/nominatim_api/search/db_search_builder.py

index c63803d21b6d10935c3cdc9f758caed48d91c308..0292335eb918391c296cb8d05735aeb82e5ea501 100644 (file)
--- a/src/nominatim_api/search/db_search_builder.py
+++ b/src/nominatim_api/search/db_search_builder.py
@@ -208,7 +208,7 @@ class SearchBuilder:
          addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
          addr_tokens = list({t.token for t in addr_partials})
  
-        exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
+        exp_count = min(t.count for t in name_partials.values()) / (3**(len(name_partials) - 1))
  
          if (len(name_partials) > 3 or exp_count < 8000):
              yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
@@ -264,8 +264,6 @@ class SearchBuilder:
              address lookups will use the index, when the occurrences are not
              too many.
          """
-        # At this point drop unindexed partials from the address.
-        # This might yield wrong results, nothing we can do about that.
          if use_lookup:
              addr_restrict_tokens = []
              addr_lookup_tokens = [t.token for t in addr_partials]
diff --git a/src/nominatim_api/sql/sqlalchemy_types/geometry.py b/src/nominatim_api/sql/sqlalchemy_types/geometry.py

index 90adcce850ec6c7d82c1b41c8a32065e7a3b49e7..583568c45f8743e79488004e7e85110f34ffa0cf 100644 (file)
--- a/src/nominatim_api/sql/sqlalchemy_types/geometry.py
+++ b/src/nominatim_api/sql/sqlalchemy_types/geometry.py
@@ -173,7 +173,7 @@ class Geometry(types.UserDefinedType):  # type: ignore[type-arg]
      def __init__(self, subtype: str = 'Geometry'):
          self.subtype = subtype
  
-    def get_col_spec(self) -> str:
+    def get_col_spec(self, **_: Any) -> str:
          return f'GEOMETRY({self.subtype}, 4326)'
  
      def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
diff --git a/src/nominatim_db/tokenizer/icu_tokenizer.py b/src/nominatim_db/tokenizer/icu_tokenizer.py

index 858cb64c63ef4843a49d42da20fa0c2f65fc05ed..19b838639ab0e557a7cba97fbe5e012a9bf81b70 100644 (file)
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -121,10 +121,10 @@ class ICUTokenizer(AbstractTokenizer):
                             SELECT unnest(nameaddress_vector) as id, count(*)
                                   FROM search_name GROUP BY id)
                    SELECT coalesce(a.id, w.id) as id,
-                         (CASE WHEN w.count is null THEN '{}'::JSONB
+                         (CASE WHEN w.count is null or w.count <= 1 THEN '{}'::JSONB
                                ELSE jsonb_build_object('count', w.count) END
                            ||
-                          CASE WHEN a.count is null THEN '{}'::JSONB
+                          CASE WHEN a.count is null or a.count <= 1 THEN '{}'::JSONB
                                ELSE jsonb_build_object('addr_count', a.count) END) as info
                    FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
                    """)
@@ -134,9 +134,10 @@ class ICUTokenizer(AbstractTokenizer):
                  drop_tables(conn, 'tmp_word')
                  cur.execute("""CREATE TABLE tmp_word AS
                                  SELECT word_id, word_token, type, word,
-                                       (CASE WHEN wf.info is null THEN word.info
-                                        ELSE coalesce(word.info, '{}'::jsonb) || wf.info
-                                        END) as info
+                                       coalesce(word.info, '{}'::jsonb)
+                                       - 'count' - 'addr_count' ||
+                                       coalesce(wf.info, '{}'::jsonb)
+                                       as info
                                  FROM word LEFT JOIN word_frequencies wf
                                       ON word.word_id = wf.id
                                  ORDER BY word_id
@@ -585,10 +586,14 @@ class ICUNameAnalyzer(AbstractAnalyzer):
              if word_id:
                  result = self._cache.housenumbers.get(word_id, result)
                  if result[0] is None:
-                    variants = analyzer.compute_variants(word_id)
+                    varout = analyzer.compute_variants(word_id)
+                    if isinstance(varout, tuple):
+                        variants = varout[0]
+                    else:
+                        variants = varout
                      if variants:
                          hid = execute_scalar(self.conn, "SELECT create_analyzed_hnr_id(%s, %s)",
-                                             (word_id, list(variants)))
+                                             (word_id, variants))
                          result = hid, variants[0]
                          self._cache.housenumbers[word_id] = result
  
@@ -633,13 +638,17 @@ class ICUNameAnalyzer(AbstractAnalyzer):
  
              full, part = self._cache.names.get(token_id, (None, None))
              if full is None:
-                variants = analyzer.compute_variants(word_id)
+                varset = analyzer.compute_variants(word_id)
+                if isinstance(varset, tuple):
+                    variants, lookups = varset
+                else:
+                    variants, lookups = varset, None
                  if not variants:
                      continue
  
                  with self.conn.cursor() as cur:
-                    cur.execute("SELECT * FROM getorcreate_full_word(%s, %s)",
-                                (token_id, variants))
+                    cur.execute("SELECT * FROM getorcreate_full_word(%s, %s, %s)",
+                                (token_id, variants, lookups))
                      full, part = cast(Tuple[int, List[int]], cur.fetchone())
  
                  self._cache.names[token_id] = (full, part)
diff --git a/src/nominatim_db/tokenizer/token_analysis/base.py b/src/nominatim_db/tokenizer/token_analysis/base.py

index 52ee801343fb6c29d2425dd356c0b9df495a745b..186f1d3ebc1113575a41929b605cfc32d8f5ecb2 100644 (file)
--- a/src/nominatim_db/tokenizer/token_analysis/base.py
+++ b/src/nominatim_db/tokenizer/token_analysis/base.py
@@ -7,7 +7,7 @@
  """
  Common data types and protocols for analysers.
  """
-from typing import Mapping, List, Any
+from typing import Mapping, List, Any, Union, Tuple
  
  from ...typing import Protocol
  from ...data.place_name import PlaceName
@@ -33,7 +33,7 @@ class Analyzer(Protocol):
                      for example because the character set in use does not match.
          """
  
-    def compute_variants(self, canonical_id: str) -> List[str]:
+    def compute_variants(self, canonical_id: str) -> Union[List[str], Tuple[List[str], List[str]]]:
          """ Compute the transliterated spelling variants for the given
              canonical ID.
  
diff --git a/src/nominatim_db/tokenizer/token_analysis/generic.py b/src/nominatim_db/tokenizer/token_analysis/generic.py

index fa9dc4dfa54c66e6f25a408129ba327d228b38b0..b01cebf75e78081fbd74a91966cfe3a3876da36a 100644 (file)
--- a/src/nominatim_db/tokenizer/token_analysis/generic.py
+++ b/src/nominatim_db/tokenizer/token_analysis/generic.py
@@ -7,7 +7,7 @@
  """
  Generic processor for names that creates abbreviation variants.
  """
-from typing import Mapping, Dict, Any, Iterable, Iterator, Optional, List, cast
+from typing import Mapping, Dict, Any, Iterable, Optional, List, cast, Tuple
  import itertools
  
  from ...errors import UsageError
@@ -78,7 +78,7 @@ class GenericTokenAnalysis:
          """
          return cast(str, self.norm.transliterate(name.name)).strip()
  
-    def compute_variants(self, norm_name: str) -> List[str]:
+    def compute_variants(self, norm_name: str) -> Tuple[List[str], List[str]]:
          """ Compute the spelling variants for the given normalized name
              and transliterate the result.
          """
@@ -87,18 +87,20 @@ class GenericTokenAnalysis:
          for mutation in self.mutations:
              variants = mutation.generate(variants)
  
-        return [name for name in self._transliterate_unique_list(norm_name, variants) if name]
-
-    def _transliterate_unique_list(self, norm_name: str,
-                                   iterable: Iterable[str]) -> Iterator[Optional[str]]:
-        seen = set()
+        varset = set(map(str.strip, variants))
          if self.variant_only:
-            seen.add(norm_name)
+            varset.discard(norm_name)
+
+        trans = []
+        norm = []
+
+        for var in varset:
+            t = self.to_ascii.transliterate(var).strip()
+            if t:
+                trans.append(t)
+                norm.append(var)
  
-        for variant in map(str.strip, iterable):
-            if variant not in seen:
-                seen.add(variant)
-                yield self.to_ascii.transliterate(variant).strip()
+        return trans, norm
  
      def _generate_word_variants(self, norm_name: str) -> Iterable[str]:
          baseform = '^ ' + norm_name + ' ^'
diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py

index ce00281cff7e09e850fb4d5ee957fb687f65d809..12cef894f863cb2336b1be26240e28fb4a8ba28e 100644 (file)
--- a/test/python/tokenizer/test_icu.py
+++ b/test/python/tokenizer/test_icu.py
@@ -230,19 +230,20 @@ def test_update_statistics(word_table, table_factory, temp_db_cursor,
                             tokenizer_factory, test_config):
      word_table.add_full_word(1000, 'hello')
      word_table.add_full_word(1001, 'bye')
+    word_table.add_full_word(1002, 'town')
      table_factory('search_name',
                    'place_id BIGINT, name_vector INT[], nameaddress_vector INT[]',
-                  [(12, [1000], [1001])])
+                  [(12, [1000], [1001]), (13, [1001], [1002]), (14, [1000, 1001], [1002])])
      tok = tokenizer_factory()
  
      tok.update_statistics(test_config)
  
-    assert temp_db_cursor.scalar("""SELECT count(*) FROM word
-                                    WHERE type = 'W' and word_id = 1000 and
-                                          (info->>'count')::int > 0""") == 1
-    assert temp_db_cursor.scalar("""SELECT count(*) FROM word
-                                    WHERE type = 'W' and word_id = 1001 and
-                                          (info->>'addr_count')::int > 0""") == 1
+    assert temp_db_cursor.row_set("""SELECT word_id,
+                                            (info->>'count')::int,
+                                            (info->>'addr_count')::int
+                                     FROM word
+                                     WHERE type = 'W'""") == \
+        {(1000, 2, None), (1001, 2, None), (1002, None, 2)}
  
  
  def test_normalize_postcode(analyzer):
diff --git a/test/python/tokenizer/token_analysis/test_generic.py b/test/python/tokenizer/token_analysis/test_generic.py

index 02870f2445e7b798a7bc29ffa5aa794d2dd1a9a1..48f2483bc8e54cfe2a6ceed3d89a2efe14c5561e 100644 (file)
--- a/test/python/tokenizer/token_analysis/test_generic.py
+++ b/test/python/tokenizer/token_analysis/test_generic.py
@@ -40,7 +40,7 @@ def make_analyser(*variants, variant_only=False):
  
  def get_normalized_variants(proc, name):
      norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
-    return proc.compute_variants(norm.transliterate(name).strip())
+    return proc.compute_variants(norm.transliterate(name).strip())[0]
  
  
  def test_no_variants():
diff --git a/test/python/tokenizer/token_analysis/test_generic_mutation.py b/test/python/tokenizer/token_analysis/test_generic_mutation.py

index 2ce2236a3c490762c965c42a75cdcc4344661c54..e0507e4c30ace40f5d4ead0c0889b10cb8eddb6b 100644 (file)
--- a/test/python/tokenizer/token_analysis/test_generic_mutation.py
+++ b/test/python/tokenizer/token_analysis/test_generic_mutation.py
@@ -40,7 +40,7 @@ class TestMutationNoVariants:
  
      def variants(self, name):
          norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
-        return set(self.analysis.compute_variants(norm.transliterate(name).strip()))
+        return set(self.analysis.compute_variants(norm.transliterate(name).strip())[0])
  
      @pytest.mark.parametrize('pattern', ('(capture)', ['a list']))
      def test_bad_pattern(self, pattern):
author	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 31 Mar 2025 15:14:19 +0000 (17:14 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 31 Mar 2025 15:14:19 +0000 (17:14 +0200)
lib-sql/functions.sql		patch \| blob \| history
lib-sql/functions/address_lookup.sql	[deleted file]	patch \| blob \| history
lib-sql/tokenizer/icu_tokenizer.sql		patch \| blob \| history
settings/icu-rules/variants-en.yaml		patch \| blob \| history
src/nominatim_api/search/db_search_builder.py		patch \| blob \| history
src/nominatim_api/sql/sqlalchemy_types/geometry.py		patch \| blob \| history
src/nominatim_db/tokenizer/icu_tokenizer.py		patch \| blob \| history
src/nominatim_db/tokenizer/token_analysis/base.py		patch \| blob \| history
src/nominatim_db/tokenizer/token_analysis/generic.py		patch \| blob \| history
test/python/tokenizer/test_icu.py		patch \| blob \| history
test/python/tokenizer/token_analysis/test_generic.py		patch \| blob \| history
test/python/tokenizer/token_analysis/test_generic_mutation.py		patch \| blob \| history