From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 2 Apr 2025 10:01:50 +0000 (+0200)
Subject: release 5.1.0post2
X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/HEAD?ds=inline;hp=0a7624039bc4189fd999eb23ea5a175a1c4b2dfb

release 5.1.0post2
---

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 311414fe..6c90cd3c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -113,3 +113,5 @@ Checklist for releases:
   * run `nominatim --version` to confirm correct version
 * [ ] tag new release and add a release on github.com
 * [ ] build pip packages and upload to pypi
+  * `make build`
+  * `twine upload dist/*`
diff --git a/ChangeLog b/ChangeLog
index 9ffe4038..dff198eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+5.1.0
+ * replace datrie with simple internal trie implementation
+ * add pattern-based postcode parser for queries,
+   postcodes no longer need to be present in OSM to be found
+ * take variants into account when computing token similarity
+ * add extratags output to geocodejson format
+ * fix default layer setting used for structured queries
+ * update abbreviation lists for Russian and English
+   (thanks @shoorick, @IvanShift, @mhsrn21)
+ * fix variant generation for Norwegian
+ * fix normalization around space-like characters
+ * improve postcode search and handling of postcodes in queries
+ * reorganise internal query structure and get rid of slow enums
+ * enable code linting for tests
+ * various code moderinsations in test code (thanks @eumiro)
+ * remove setting osm2pgsql location via config.lib_dir
+ * make SQL functions parallel save as far as possible (thanks @otbutz)
+ * various fixes and improvements to documentation (thanks @TuringVerified)
+
 5.0.0
  * increase required versions for PostgreSQL (12+), PostGIS (3.0+)
  * remove installation via cmake and debundle osm2pgsql
diff --git a/SECURITY.md b/SECURITY.md
index e3660bcd..98295e1f 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -9,7 +9,8 @@ versions.
 
 | Version | End of support for security updates |
 | ------- | ----------------------------------- |
-| 5.0.x   | 2027-02-06
+| 5.1.x   | 2027-04-01                          |
+| 5.0.x   | 2027-02-06                          |
 | 4.5.x   | 2026-09-12                          |
 | 4.4.x   | 2026-03-07                          |
 | 4.3.x   | 2025-09-07                          |
diff --git a/docs/customize/Settings.md b/docs/customize/Settings.md
index 94726ca7..edf2241b 100644
--- a/docs/customize/Settings.md
+++ b/docs/customize/Settings.md
@@ -602,6 +602,43 @@ results gathered so far.
 Note that under high load you may observe that users receive different results
 than usual without seeing an error. This may cause some confusion.
 
+#### NOMINATIM_OUTPUT_NAMES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Specifies order of name tags |
+| **Format:**        | string: comma-separated list of tag names |
+| **Default:**       | name:XX,name,brand,official_name:XX,short_name:XX,official_name,short_name,ref |
+
+Specifies the order in which different name tags are used.
+The values in this list determine the preferred order of name variants,
+including language-specific names (in OSM: the name tag with and without any language suffix).
+
+Comma-separated list, where :XX stands for language suffix
+(e.g. name:en) and no :XX stands for general tags (e.g. name).
+
+See also [NOMINATIM_DEFAULT_LANGUAGE](#nominatim_default_language).
+
+!!! note
+    If NOMINATIM_OUTPUT_NAMES = `name:XX,name,short_name:XX,short_name` the search follows
+
+        ```
+        'name', 'short_name'
+        ```
+
+    if we have no preferred language order for showing search results.
+
+    For languages ['en', 'es'] the search follows
+
+        ```
+        'name:en', 'name:es',
+        'name',
+        'short_name:en', 'short_name:es',
+        'short_name'
+        ```
+
+    For those familiar with the internal implementation, the `_place_*` expansion is added, but to simplify, it is not included in this example.
+
 ### Logging Settings
 
 #### NOMINATIM_LOG_DB
diff --git a/docs/customize/Tokenizers.md b/docs/customize/Tokenizers.md
index d290c148..23db34c9 100644
--- a/docs/customize/Tokenizers.md
+++ b/docs/customize/Tokenizers.md
@@ -67,7 +67,13 @@ Here is an example configuration file:
 
 ``` yaml
 query-preprocessing:
-    - normalize
+    - step: split_japanese_phrases
+    - step: regex_replace
+      replacements:
+        - pattern: https?://[^\s]* # Filter URLs starting with http or https
+          replace: ''
+    - step: normalize
+
 normalization:
     - ":: lower ()"
     - "Ã > 'ss'" # German szet is unambiguously equal to double ss
@@ -88,8 +94,8 @@ token-analysis:
             replacements: ['Ã¤', 'ae']
 ```
 
-The configuration file contains four sections:
-`normalization`, `transliteration`, `sanitizers` and `token-analysis`.
+The configuration file contains five sections:
+`query-preprocessing`, `normalization`, `transliteration`, `sanitizers` and `token-analysis`.
 
 #### Query preprocessing
 
@@ -106,6 +112,19 @@ The following is a list of preprocessors that are shipped with Nominatim.
         heading_level: 6
         docstring_section_style: spacy
 
+##### regex-replace
+
+::: nominatim_api.query_preprocessing.regex_replace
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+    description: 
+        This option runs any given regex pattern on the input and replaces values accordingly
+    replacements:
+        - pattern: regex pattern
+          replace: string to replace with
+
 
 #### Normalization and Transliteration
 
diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md
index 9ade7916..709f9b7d 100644
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -69,9 +69,9 @@ To set up the virtual environment with all necessary packages run:
 ```sh
 virtualenv ~/nominatim-dev-venv
 ~/nominatim-dev-venv/bin/pip install\
-    psutil psycopg[binary] PyICU SQLAlchemy \
+    psutil 'psycopg[binary]' PyICU SQLAlchemy \
     python-dotenv jinja2 pyYAML behave \
-    mkdocs mkdocstrings mkdocs-gen-files pytest pytest-asyncio flake8 \
+    mkdocs 'mkdocstrings[python]' mkdocs-gen-files pytest pytest-asyncio flake8 \
     types-jinja2 types-markupsafe types-psutil types-psycopg2 \
     types-pygments types-pyyaml types-requests types-ujson \
     types-urllib3 typing-extensions unicorn falcon starlette \
diff --git a/lib-sql/functions.sql b/lib-sql/functions.sql
index 158969d9..737a3f21 100644
--- a/lib-sql/functions.sql
+++ b/lib-sql/functions.sql
@@ -8,7 +8,6 @@
 {% include('functions/utils.sql') %}
 {% include('functions/ranking.sql') %}
 {% include('functions/importance.sql') %}
-{% include('functions/address_lookup.sql') %}
 {% include('functions/interpolation.sql') %}
 
 {% if 'place' in db.tables %}
diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql
deleted file mode 100644
index b59b7656..00000000
--- a/lib-sql/functions/address_lookup.sql
+++ /dev/null
@@ -1,334 +0,0 @@
--- SPDX-License-Identifier: GPL-2.0-only
---
--- This file is part of Nominatim. (https://nominatim.org)
---
--- Copyright (C) 2022 by the Nominatim developer community.
--- For a full list of authors see the git log.
-
--- Functions for returning address information for a place.
-
-DROP TYPE IF EXISTS addressline CASCADE;
-CREATE TYPE addressline as (
-  place_id BIGINT,
-  osm_type CHAR(1),
-  osm_id BIGINT,
-  name HSTORE,
-  class TEXT,
-  type TEXT,
-  place_type TEXT,
-  admin_level INTEGER,
-  fromarea BOOLEAN,
-  isaddress BOOLEAN,
-  rank_address INTEGER,
-  distance FLOAT
-);
-
-
-CREATE OR REPLACE FUNCTION get_name_by_language(name hstore, languagepref TEXT[])
-  RETURNS TEXT
-  AS $$
-DECLARE
-  result TEXT;
-BEGIN
-  IF name is null THEN
-    RETURN null;
-  END IF;
-
-  FOR j IN 1..array_upper(languagepref,1) LOOP
-    IF name ? languagepref[j] THEN
-      result := trim(name->languagepref[j]);
-      IF result != '' THEN
-        return result;
-      END IF;
-    END IF;
-  END LOOP;
-
-  -- as a fallback - take the last element since it is the default name
-  RETURN trim((avals(name))[array_length(avals(name), 1)]);
-END;
-$$
-LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
-
-
---housenumber only needed for tiger data
-CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT,
-                                                   housenumber INTEGER,
-                                                   languagepref TEXT[])
-  RETURNS TEXT
-  AS $$
-DECLARE
-  result TEXT[];
-  currresult TEXT;
-  prevresult TEXT;
-  location RECORD;
-BEGIN
-
-  result := '{}';
-  prevresult := '';
-
-  FOR location IN
-    SELECT name,
-       CASE WHEN place_id = for_place_id THEN 99 ELSE rank_address END as rank_address
-    FROM get_addressdata(for_place_id, housenumber)
-    WHERE isaddress order by rank_address desc
-  LOOP
-    currresult := trim(get_name_by_language(location.name, languagepref));
-    IF currresult != prevresult AND currresult IS NOT NULL
-       AND result[(100 - location.rank_address)] IS NULL
-    THEN
-      result[(100 - location.rank_address)] := currresult;
-      prevresult := currresult;
-    END IF;
-  END LOOP;
-
-  RETURN array_to_string(result,', ');
-END;
-$$
-LANGUAGE plpgsql STABLE PARALLEL SAFE;
-
-DROP TYPE IF EXISTS addressdata_place;
-CREATE TYPE addressdata_place AS (
-  place_id BIGINT,
-  country_code VARCHAR(2),
-  housenumber TEXT,
-  postcode TEXT,
-  class TEXT,
-  type TEXT,
-  name HSTORE,
-  address HSTORE,
-  centroid GEOMETRY
-);
-
--- Compute the list of address parts for the given place.
---
--- If in_housenumber is greator or equal 0, look for an interpolation.
-CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT, in_housenumber INTEGER)
-  RETURNS setof addressline
-  AS $$
-DECLARE
-  place addressdata_place;
-  location RECORD;
-  country RECORD;
-  current_rank_address INTEGER;
-  location_isaddress BOOLEAN;
-BEGIN
-  -- The place in question might not have a direct entry in place_addressline.
-  -- Look for the parent of such places then and save it in place.
-
-  -- first query osmline (interpolation lines)
-  IF in_housenumber >= 0 THEN
-    SELECT parent_place_id as place_id, country_code,
-           in_housenumber as housenumber, postcode,
-           'place' as class, 'house' as type,
-           null as name, null as address,
-           ST_Centroid(linegeo) as centroid
-      INTO place
-      FROM location_property_osmline
-      WHERE place_id = in_place_id
-            AND in_housenumber between startnumber and endnumber;
-  END IF;
-
-  --then query tiger data
-  {% if config.get_bool('USE_US_TIGER_DATA') %}
-  IF place IS NULL AND in_housenumber >= 0 THEN
-    SELECT parent_place_id as place_id, 'us' as country_code,
-           in_housenumber as housenumber, postcode,
-           'place' as class, 'house' as type,
-           null as name, null as address,
-           ST_Centroid(linegeo) as centroid
-      INTO place
-      FROM location_property_tiger
-      WHERE place_id = in_place_id
-            AND in_housenumber between startnumber and endnumber;
-  END IF;
-  {% endif %}
-
-  -- postcode table
-  IF place IS NULL THEN
-    SELECT parent_place_id as place_id, country_code,
-           null::text as housenumber, postcode,
-           'place' as class, 'postcode' as type,
-           null as name, null as address,
-           null as centroid
-      INTO place
-      FROM location_postcode
-      WHERE place_id = in_place_id;
-  END IF;
-
-  -- POI objects in the placex table
-  IF place IS NULL THEN
-    SELECT parent_place_id as place_id, country_code,
-           coalesce(address->'housenumber',
-                    address->'streetnumber',
-                    address->'conscriptionnumber')::text as housenumber,
-           postcode,
-           class, type,
-           name, address,
-           centroid
-      INTO place
-      FROM placex
-      WHERE place_id = in_place_id and rank_search > 27;
-  END IF;
-
-  -- If place is still NULL at this point then the object has its own
-  -- entry in place_address line. However, still check if there is not linked
-  -- place we should be using instead.
-  IF place IS NULL THEN
-    select coalesce(linked_place_id, place_id) as place_id,  country_code,
-           null::text as housenumber, postcode,
-           class, type,
-           null as name, address,
-           null as centroid
-      INTO place
-      FROM placex where place_id = in_place_id;
-  END IF;
-
---RAISE WARNING '% % % %',searchcountrycode, searchhousenumber, searchpostcode;
-
-  -- --- Return the record for the base entry.
-
-  current_rank_address := 1000;
-  FOR location IN
-    SELECT placex.place_id, osm_type, osm_id, name,
-           coalesce(extratags->'linked_place', extratags->'place') as place_type,
-           class, type, admin_level,
-           CASE WHEN rank_address = 0 THEN 100
-                WHEN rank_address = 11 THEN 5
-                ELSE rank_address END as rank_address,
-           country_code
-      FROM placex
-      WHERE place_id = place.place_id
-  LOOP
---RAISE WARNING '%',location;
-    -- mix in default names for countries
-    IF location.rank_address = 4 and place.country_code is not NULL THEN
-      FOR country IN
-        SELECT coalesce(name, ''::hstore) as name FROM country_name
-          WHERE country_code = place.country_code LIMIT 1
-      LOOP
-        place.name := country.name || place.name;
-      END LOOP;
-    END IF;
-
-    IF location.rank_address < 4 THEN
-      -- no country locations for ranks higher than country
-      place.country_code := NULL::varchar(2);
-    ELSEIF place.country_code IS NULL AND location.country_code IS NOT NULL THEN
-      place.country_code := location.country_code;
-    END IF;
-
-    RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id,
-                    location.name, location.class, location.type,
-                    location.place_type,
-                    location.admin_level, true,
-                    location.type not in ('postcode', 'postal_code'),
-                    location.rank_address, 0)::addressline;
-
-    current_rank_address := location.rank_address;
-  END LOOP;
-
-  -- --- Return records for address parts.
-
-  FOR location IN
-    SELECT placex.place_id, osm_type, osm_id, name, class, type,
-           coalesce(extratags->'linked_place', extratags->'place') as place_type,
-           admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
-           CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
-           distance, country_code, postcode
-      FROM place_addressline join placex on (address_place_id = placex.place_id)
-      WHERE place_addressline.place_id IN (place.place_id, in_place_id)
-            AND linked_place_id is null
-            AND (placex.country_code IS NULL OR place.country_code IS NULL
-                 OR placex.country_code = place.country_code)
-      ORDER BY rank_address desc,
-               (place_addressline.place_id = in_place_id) desc,
-               (CASE WHEN coalesce((avals(name) && avals(place.address)), False) THEN 2
-                     WHEN isaddress THEN 0
-                     WHEN fromarea
-                          and place.centroid is not null
-                          and ST_Contains(geometry, place.centroid) THEN 1
-                     ELSE -1 END) desc,
-               fromarea desc, distance asc, rank_search desc
-  LOOP
-    -- RAISE WARNING '%',location;
-    location_isaddress := location.rank_address != current_rank_address;
-
-    IF place.country_code IS NULL AND location.country_code IS NOT NULL THEN
-      place.country_code := location.country_code;
-    END IF;
-    IF location.type in ('postcode', 'postal_code')
-       AND place.postcode is not null
-    THEN
-      -- If the place had a postcode assigned, take this one only
-      -- into consideration when it is an area and the place does not have
-      -- a postcode itself.
-      IF location.fromarea AND location_isaddress
-         AND (place.address is null or not place.address ? 'postcode')
-      THEN
-        place.postcode := null; -- remove the less exact postcode
-      ELSE
-        location_isaddress := false;
-      END IF;
-    END IF;
-    RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id,
-                    location.name, location.class, location.type,
-                    location.place_type,
-                    location.admin_level, location.fromarea,
-                    location_isaddress,
-                    location.rank_address,
-                    location.distance)::addressline;
-
-    current_rank_address := location.rank_address;
-  END LOOP;
-
-  -- If no country was included yet, add the name information from country_name.
-  IF current_rank_address > 4 THEN
-    FOR location IN
-      SELECT name || coalesce(derived_name, ''::hstore) as name FROM country_name
-        WHERE country_code = place.country_code LIMIT 1
-    LOOP
---RAISE WARNING '% % %',current_rank_address,searchcountrycode,countryname;
-      RETURN NEXT ROW(null, null, null, location.name, 'place', 'country', NULL,
-                      null, true, true, 4, 0)::addressline;
-    END LOOP;
-  END IF;
-
-  -- Finally add some artificial rows.
-  IF place.country_code IS NOT NULL THEN
-    location := ROW(null, null, null, hstore('ref', place.country_code),
-                    'place', 'country_code', null, null, true, false, 4, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  IF place.name IS NOT NULL THEN
-    location := ROW(in_place_id, null, null, place.name, place.class,
-                    place.type, null, null, true, true, 29, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  IF place.housenumber IS NOT NULL THEN
-    location := ROW(null, null, null, hstore('ref', place.housenumber),
-                    'place', 'house_number', null, null, true, true, 28, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  IF place.address is not null and place.address ? '_unlisted_place' THEN
-    RETURN NEXT ROW(null, null, null, hstore('name', place.address->'_unlisted_place'),
-                    'place', 'locality', null, null, true, true, 25, 0)::addressline;
-  END IF;
-
-  IF place.postcode is not null THEN
-    location := ROW(null, null, null, hstore('ref', place.postcode), 'place',
-                    'postcode', null, null, false, true, 5, 0)::addressline;
-    RETURN NEXT location;
-  ELSEIF place.address is not null and place.address ? 'postcode'
-         and not place.address->'postcode' SIMILAR TO '%(,|;)%' THEN
-    location := ROW(null, null, null, hstore('ref', place.address->'postcode'), 'place',
-                    'postcode', null, null, false, true, 5, 0)::addressline;
-    RETURN NEXT location;
-  END IF;
-
-  RETURN;
-END;
-$$
-LANGUAGE plpgsql STABLE PARALLEL SAFE;
diff --git a/lib-sql/tokenizer/icu_tokenizer.sql b/lib-sql/tokenizer/icu_tokenizer.sql
index f0c30f1b..8cf13120 100644
--- a/lib-sql/tokenizer/icu_tokenizer.sql
+++ b/lib-sql/tokenizer/icu_tokenizer.sql
@@ -128,16 +128,14 @@ DECLARE
   partial_terms TEXT[] = '{}'::TEXT[];
   term TEXT;
   term_id INTEGER;
-  term_count INTEGER;
 BEGIN
   SELECT min(word_id) INTO full_token
     FROM word WHERE word = norm_term and type = 'W';
 
   IF full_token IS NULL THEN
     full_token := nextval('seq_word');
-    INSERT INTO word (word_id, word_token, type, word, info)
-      SELECT full_token, lookup_term, 'W', norm_term,
-             json_build_object('count', 0)
+    INSERT INTO word (word_id, word_token, type, word)
+      SELECT full_token, lookup_term, 'W', norm_term
         FROM unnest(lookup_terms) as lookup_term;
   END IF;
 
@@ -150,14 +148,67 @@ BEGIN
 
   partial_tokens := '{}'::INT[];
   FOR term IN SELECT unnest(partial_terms) LOOP
-    SELECT min(word_id), max(info->>'count') INTO term_id, term_count
+    SELECT min(word_id) INTO term_id
+      FROM word WHERE word_token = term and type = 'w';
+
+    IF term_id IS NULL THEN
+      term_id := nextval('seq_word');
+      INSERT INTO word (word_id, word_token, type)
+        VALUES (term_id, term, 'w');
+    END IF;
+
+    partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
+  END LOOP;
+END;
+$$
+LANGUAGE plpgsql;
+
+
+CREATE OR REPLACE FUNCTION getorcreate_full_word(norm_term TEXT,
+                                                 lookup_terms TEXT[],
+                                                 lookup_norm_terms TEXT[],
+                                                 OUT full_token INT,
+                                                 OUT partial_tokens INT[])
+  AS $$
+DECLARE
+  partial_terms TEXT[] = '{}'::TEXT[];
+  term TEXT;
+  term_id INTEGER;
+BEGIN
+  SELECT min(word_id) INTO full_token
+    FROM word WHERE word = norm_term and type = 'W';
+
+  IF full_token IS NULL THEN
+    full_token := nextval('seq_word');
+    IF lookup_norm_terms IS NULL THEN
+      INSERT INTO word (word_id, word_token, type, word)
+        SELECT full_token, lookup_term, 'W', norm_term
+          FROM unnest(lookup_terms) as lookup_term;
+    ELSE
+      INSERT INTO word (word_id, word_token, type, word, info)
+        SELECT full_token, t.lookup, 'W', norm_term,
+               CASE WHEN norm_term = t.norm THEN null
+               ELSE json_build_object('lookup', t.norm) END
+          FROM unnest(lookup_terms, lookup_norm_terms) as t(lookup, norm);
+    END IF;
+  END IF;
+
+  FOR term IN SELECT unnest(string_to_array(unnest(lookup_terms), ' ')) LOOP
+    term := trim(term);
+    IF NOT (ARRAY[term] <@ partial_terms) THEN
+      partial_terms := partial_terms || term;
+    END IF;
+  END LOOP;
+
+  partial_tokens := '{}'::INT[];
+  FOR term IN SELECT unnest(partial_terms) LOOP
+    SELECT min(word_id) INTO term_id
       FROM word WHERE word_token = term and type = 'w';
 
     IF term_id IS NULL THEN
       term_id := nextval('seq_word');
-      term_count := 0;
-      INSERT INTO word (word_id, word_token, type, info)
-        VALUES (term_id, term, 'w', json_build_object('count', term_count));
+      INSERT INTO word (word_id, word_token, type)
+        VALUES (term_id, term, 'w');
     END IF;
 
     partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
diff --git a/packaging/nominatim-api/pyproject.toml b/packaging/nominatim-api/pyproject.toml
index 601029ca..ab1ed080 100644
--- a/packaging/nominatim-api/pyproject.toml
+++ b/packaging/nominatim-api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "nominatim-api"
-version = "5.0.0.post7"
+version = "5.1.0.post2"
 description = "A tool for building a database of OpenStreetMap for geocoding and for searching the database. Search library."
 readme = "README.md"
 requires-python = ">=3.7"
@@ -16,7 +16,7 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-    "SQLAlchemy==2.0.39",
+    "SQLAlchemy==2.0.40",
     "falcon==4.0.2",
     "uvicorn==0.34.0",
     "gunicorn==23.0.0"
diff --git a/packaging/nominatim-db/pyproject.toml b/packaging/nominatim-db/pyproject.toml
index 0c9a7055..88ac096b 100644
--- a/packaging/nominatim-db/pyproject.toml
+++ b/packaging/nominatim-db/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "nominatim-db"
-version = "5.0.0.post7"
+version = "5.1.0.post2"
 description = "A tool for building a database of OpenStreetMap for geocoding and for searching the database. Database backend."
 readme = "README.md"
 requires-python = ">=3.7"
@@ -17,11 +17,11 @@ classifiers = [
 ]
 dependencies = [
     "psycopg[binary]==3.2.6",
-    "python-dotenv==1.0.1",
+    "python-dotenv==1.1.0",
     "jinja2==3.1.6",
     "pyYAML==6.0.2",
     "psutil==7.0.0",
-    "PyICU==2.14",
+    "PyICU==2.15",
     "osmium==4.0.2",
 ]
 
diff --git a/settings/env.defaults b/settings/env.defaults
index b8c66667..3ebb288f 100644
--- a/settings/env.defaults
+++ b/settings/env.defaults
@@ -192,6 +192,13 @@ NOMINATIM_REQUEST_TIMEOUT=60
 # to geocode" instead.
 NOMINATIM_SEARCH_WITHIN_COUNTRIES=False
 
+# Specifies the order in which different name tags are used.
+# The values in this list determine the preferred order of name variants,
+# including language-specific names.
+# Comma-separated list, where :XX stands for language-specific tags
+# (e.g. name:en) and no :XX stands for general tags (e.g. name).
+NOMINATIM_OUTPUT_NAMES=name:XX,name,brand,official_name:XX,short_name:XX,official_name,short_name,ref
+
 ### Log settings
 #
 # The following options allow to enable logging of API requests.
diff --git a/settings/icu-rules/variants-en.yaml b/settings/icu-rules/variants-en.yaml
index 99cd6da6..54a7b475 100644
--- a/settings/icu-rules/variants-en.yaml
+++ b/settings/icu-rules/variants-en.yaml
@@ -6,25 +6,19 @@
     -  Air Force Base -> AFB
     -  Air National Guard Base -> ANGB
     -  Airport -> Aprt
-    -  Alley -> Al
-    -  Alley -> All
-    -  Alley -> Ally
-    -  Alley -> Aly
+    -  Alley -> Al,All,Ally,Aly
     -  Alleyway -> Alwy
     -  Amble -> Ambl
     -  Anex -> Anx
     -  Apartments -> Apts
-    -  Approach -> Apch
-    -  Approach -> App
+    -  Approach -> Apch,App
     -  Arcade -> Arc
     -  Arterial -> Artl
     -  Artery -> Arty
-    -  Avenue -> Av
-    -  Avenue -> Ave
+    -  Avenue -> Av,Ave
     -  Back -> Bk
     -  Banan -> Ba
-    -  Basin -> Basn
-    -  Basin -> Bsn
+    -  Basin -> Basn,Bsn
     -  Bayou -> Byu
     -  Beach -> Bch
     -  Bend -> Bnd
@@ -33,71 +27,51 @@
     -  Bluffs -> Blfs
     -  Boardwalk -> Bwlk
     -  Bottom -> Btm
-    -  Boulevard -> Blvd
-    -  Boulevard -> Bvd
+    -  Boulevard -> Blvd,Bvd
     -  Boundary -> Bdy
     -  Bowl -> Bl
     -  Brace -> Br
     -  Brae -> Br
     -  Branch -> Br
     -  Break -> Brk
-    -  Bridge -> Bdge
-    -  Bridge -> Br
-    -  Bridge -> Brdg
-    -  Bridge -> Brg
-    -  Bridge -> Bri
-    -  Broadway -> Bdwy
-    -  Broadway -> Bway
-    -  Broadway -> Bwy
+    -  Bridge$ -> Bdge,Br,Brdg,Brg,Bri
+    -  Broadway -> Bdwy,Bway,Bwy
     -  Brook -> Brk
     -  Brooks -> Brks
     -  Brow -> Brw
-    -  Buildings -> Bldgs
-    -  Buildings -> Bldngs
+    -  Buildings -> Bldgs,Bldngs
     -  Business -> Bus
     -  Burg -> Bg
     -  Burgs -> Bgs
-    -  Bypass -> Bps
-    -  Bypass -> Byp
-    -  Bypass -> Bypa
+    -  Bypass -> Bps,Byp,Bypa
     -  Byway -> Bywy
     -  Camp -> Cp
     -  Canyon -> Cyn
     -  Cape -> Cpe
     -  Caravan -> Cvn
-    -  Causeway -> Caus
-    -  Causeway -> Cswy
-    -  Causeway -> Cway
-    -  Center -> Cen
-    -  Center -> Ctr
+    -  Causeway -> Caus,Cswy,Cway
+    -  Center,Centre -> Cen,Ctr
     -  Centers -> Ctrs
     -  Central -> Ctrl
-    -  Centre -> Cen
-    -  Centre -> Ctr
     -  Centreway -> Cnwy
     -  Chase -> Ch
     -  Church -> Ch
     -  Circle -> Cir
     -  Circles -> Cirs
-    -  Circuit -> Cct
-    -  Circuit -> Ci
-    -  Circus -> Crc
-    -  Circus -> Crcs
+    -  Circuit -> Cct,Ci
+    -  Circus -> Crc,Crcs
     -  City -> Cty
     -  Cliff -> Clf
     -  Cliffs -> Clfs
     -  Close -> Cl
     -  Club -> Clb
-    -  Common -> Cmn
-    -  Common -> Comm
+    -  Common -> Cmn,Comm
     -  Commons -> Cmns
     -  Community -> Comm
     -  Concourse -> Cnc
     -  Concourse -> Con
     -  Copse -> Cps
-    -  Corner -> Cor
-    -  Corner -> Cnr
-    -  Corner -> Crn
+    -  Corner -> Cor,Cnr,Crn
     -  Corners -> Cors
     -  Corso -> Cso
     -  Cottages -> Cotts
@@ -105,36 +79,24 @@
     -  County Road -> CR
     -  County Route -> CR
     -  Course -> Crse
-    -  Court -> Crt
-    -  Court -> Ct
+    -  Court -> Crt,Ct
     -  Courts -> Cts
     -  Courtyard -> Cyd
     -  Courtyard -> Ctyd
-    -  Cove -> Ce
-    -  Cove -> Cov
-    -  Cove -> Cv
+    -  Cove$ -> Ce,Cov,Cv
     -  Coves -> Cvs
-    -  Creek -> Ck
-    -  Creek -> Cr
-    -  Creek -> Crk
+    -  Creek$ -> Ck,Cr,Crk
     -  Crescent -> Cr
     -  Crescent -> Cres
-    -  Crest -> Crst
-    -  Crest -> Cst
+    -  Crest -> Crst,Cst
     -  Croft -> Cft
-    -  Cross -> Cs
-    -  Cross -> Crss
-    -  Crossing -> Crsg
-    -  Crossing -> Csg
-    -  Crossing -> Xing
-    -  Crossroad -> Crd
-    -  Crossroad -> Xrd
+    -  Cross -> Cs,Crss
+    -  Crossing -> Crsg,Csg,Xing
+    -  Crossroad -> Crd,Xrd
     -  Crossroads -> Xrds
     -  Crossway -> Cowy
-    -  Cul-de-sac -> Cds
-    -  Cul-de-sac -> Csac
-    -  Curve -> Cve
-    -  Curve -> Curv
+    -  Cul-de-sac -> Cds,Csac
+    -  Curve -> Cve,Curv
     -  Cutting -> Cutt
     -  Dale -> Dle
     -  Dam -> Dm
@@ -143,14 +105,10 @@
     -  Divide -> Dv
     -  Down -> Dn
     -  Downs -> Dn
-    -  Drive -> Dr
-    -  Drive -> Drv
-    -  Drive -> Dv
+    -  Drive -> Dr,Drv,Dv
     -  Drives -> Drs
     -  Drive-In => Drive-In # prevent abbreviation here
-    -  Driveway -> Drwy
-    -  Driveway -> Dvwy
-    -  Driveway -> Dwy
+    -  Driveway -> Drwy,Dvwy,Dwy
     -  East -> E
     -  Edge -> Edg
     -  Elbow -> Elb
@@ -158,25 +116,18 @@
     -  Esplanade -> Esp
     -  Estate -> Est
     -  Estates -> Ests
-    -  Expressway -> Exp
-    -  Expressway -> Expy
-    -  Expressway -> Expwy
-    -  Expressway -> Xway
+    -  Expressway -> Exp,Expy,Expwy,Xway
     -  Extension -> Ex
     -  Extensions -> Exts
-    -  Fairway -> Fawy
-    -  Fairway -> Fy
+    -  Fairway -> Fawy,Fy
     -  Falls -> Fls
     -  Father -> Fr
-    -  Ferry -> Fy
-    -  Ferry -> Fry
-    -  Field -> Fd
-    -  Field -> Fld
+    -  Ferry -> Fy,Fry
+    -  Field -> Fd,Fld
     -  Fields -> Flds
     -  Fire Track -> Ftrk
     -  Firetrail -> Fit
-    -  Flat -> Fl
-    -  Flat -> Flt
+    -  Flat -> Fl,Flt
     -  Flats -> Flts
     -  Follow -> Folw
     -  Footway -> Ftwy
@@ -191,67 +142,47 @@
     -  Fork -> Frk
     -  Forks -> Frks
     -  Fort -> Ft
-    -  Freeway -> Frwy
-    -  Freeway -> Fwy
+    -  Freeway -> Frwy,Fwy
     -  Front -> Frnt
-    -  Frontage -> Fr
-    -  Frontage -> Frtg
+    -  Frontage -> Fr,Frtg
     -  Garden -> Gdn
-    -  Gardens -> Gdn
-    -  Gardens -> Gdns
-    -  Gate -> Ga
-    -  Gate -> Gte
-    -  Gates -> Ga
-    -  Gates -> Gte
-    -  Gateway -> Gwy
-    -  Gateway -> Gtwy
+    -  Gardens -> Gdn,Gdns
+    -  Gate,Gates -> Ga,Gte
+    -  Gateway -> Gwy,Gtwy
     -  George -> Geo
-    -  Glade -> Gl
-    -  Glade -> Gld
-    -  Glade -> Glde
+    -  Glade$ -> Gl,Gld,Glde
     -  Glen -> Gln
     -  Glens -> Glns
     -  Grange -> Gra
-    -  Green -> Gn
-    -  Green -> Grn
+    -  Green -> Gn,Grn
     -  Greens -> Grns
     -  Ground -> Grnd
-    -  Grove -> Gr
-    -  Grove -> Gro
-    -  Grove -> Grv
+    -  Grove$ -> Gr,Gro,Grv
     -  Groves -> Grvs
     -  Grovet -> Gr
     -  Gully -> Gly
-    -  Harbor -> Hbr
+    -  Harbor -> Hbr,Harbour
     -  Harbors -> Hbrs
-    -  Harbour -> Hbr
+    -  Harbour -> Hbr,Harbor
     -  Haven -> Hvn
     -  Head -> Hd
     -  Heads -> Hd
-    -  Heights -> Hgts
-    -  Heights -> Ht
-    -  Heights -> Hts
+    -  Heights -> Hgts,Ht,Hts
     -  High School -> HS
-    -  Highroad -> Hird
-    -  Highroad -> Hrd
+    -  Highroad -> Hird,Hrd
     -  Highway -> Hwy
     -  Hill -> Hl
-    -  Hills -> Hl
-    -  Hills -> Hls
+    -  Hills -> Hl,Hls
     -  Hollow -> Holw
     -  Hospital -> Hosp
-    -  House -> Ho
-    -  House -> Hse
+    -  House -> Ho,Hse
     -  Industrial -> Ind
     -  Inlet -> Inlt
     -  Interchange -> Intg
     -  International -> Intl
-    -  Island -> I
-    -  Island -> Is
+    -  Island -> I,Is
     -  Islands -> Iss
-    -  Junction -> Jct
-    -  Junction -> Jctn
-    -  Junction -> Jnc
+    -  Junction -> Jct,Jctn,Jnc
     -  Junctions -> Jcts
     -  Junior -> Jr
     -  Key -> Ky
@@ -260,40 +191,31 @@
     -  Knolls -> Knls
     -  Lagoon -> Lgn
     -  Lake -> Lk
-    -  Lakes -> L
-    -  Lakes -> Lks
-    -  Landing -> Ldg
-    -  Landing -> Lndg
-    -  Lane -> La
-    -  Lane -> Ln
+    -  Lakes -> L,Lks
+    -  Landing -> Ldg,Lndg
+    -  Lane -> La,Ln
     -  Laneway -> Lnwy
     -  Light -> Lgt
     -  Lights -> Lgts
     -  Line -> Ln
     -  Link -> Lk
-    -  Little -> Lit
-    -  Little -> Lt
+    -  Little -> Lit,Lt
     -  Loaf -> Lf
     -  Lock -> Lck
     -  Locks -> Lcks
     -  Lodge -> Ldg
     -  Lookout -> Lkt
     -  Loop -> Lp
-    -  Lower -> Low
-    -  Lower -> Lr
-    -  Lower -> Lwr
+    -  Lower -> Low,Lr,Lwr
     -  Mall -> Ml
     -  Manor -> Mnr
     -  Manors -> Mnrs
     -  Mansions -> Mans
     -  Market -> Mkt
     -  Meadow -> Mdw
-    -  Meadows -> Mdw
-    -  Meadows -> Mdws
+    -  Meadows -> Mdw,Mdws
     -  Mead -> Md
-    -  Meander -> Mdr
-    -  Meander -> Mndr
-    -  Meander -> Mr
+    -  Meander -> Mdr,Mndr,Mr
     -  Medical -> Med
     -  Memorial -> Mem
     -  Mews -> Mw
@@ -304,12 +226,10 @@
     -  Mill -> Ml
     -  Mills -> Mls
     -  Mission -> Msn
-    -  Motorway -> Mtwy
-    -  Motorway -> Mwy
+    -  Motorway -> Mtwy,Mwy
     -  Mount -> Mt
     -  Mountain -> Mtn
-    -  Mountains -> Mtn
-    -  Mountains -> Mtns
+    -  Mountains$ -> Mtn,Mtns
     -  Municipal -> Mun
     -  Museum -> Mus
     -  National Park -> NP
@@ -321,50 +241,37 @@
     -  Northeast -> NE
     -  Northwest -> NW
     -  Orchard -> Orch
-    -  Outlook -> Out
-    -  Outlook -> Otlk
+    -  Outlook -> Out,Otlk
     -  Overpass -> Opas
     -  Parade -> Pde
     -  Paradise -> Pdse
     -  Park -> Pk
     -  Parklands -> Pkld
-    -  Parkway -> Pkwy
-    -  Parkway -> Pky
-    -  Parkway -> Pwy
+    -  Parkway -> Pkwy,Pky,Pwy
     -  Parkways -> Pkwy
     -  Pass -> Ps
     -  Passage -> Psge
-    -  Pathway -> Phwy
-    -  Pathway -> Pway
-    -  Pathway -> Pwy
+    -  Pathway -> Phwy,Pway,Pwy
     -  Piazza -> Piaz
     -  Pike -> Pk
     -  Pine -> Pne
     -  Pines -> Pnes
     -  Place -> Pl
-    -  Plain -> Pl
-    -  Plain -> Pln
-    -  Plains -> Pl
-    -  Plains -> Plns
+    -  Plain -> Pl,Pln
+    -  Plains -> Pl,Plns
     -  Plateau -> Plat
-    -  Plaza -> Pl
-    -  Plaza -> Plz
-    -  Plaza -> Plza
+    -  Plaza -> Pl,Plz,Plza
     -  Pocket -> Pkt
-    -  Point -> Pnt
-    -  Point -> Pt
+    -  Point -> Pnt,Pt
     -  Points -> Pts
-    -  Port -> Prt
-    -  Port -> Pt
+    -  Port -> Prt,Pt
     -  Ports -> Prts
     -  Post Office -> PO
     -  Prairie -> Pr
     -  Precinct -> Pct
-    -  Promenade -> Prm
-    -  Promenade -> Prom
+    -  Promenade -> Prm,Prom
     -  Quadrangle -> Qdgl
-    -  Quadrant -> Qdrt
-    -  Quadrant -> Qd
+    -  Quadrant -> Qdrt,Qd
     -  Quay -> Qy
     -  Quays -> Qy
     -  Quays -> Qys
@@ -372,8 +279,7 @@
     -  Ramble -> Ra
     -  Ramble -> Rmbl
     -  Ranch -> Rnch
-    -  Range -> Rge
-    -  Range -> Rnge
+    -  Range -> Rge,Rnge
     -  Rapid -> Rpd
     -  Rapids -> Rpds
     -  Reach -> Rch
@@ -381,37 +287,31 @@
     -  Reserve -> Res
     -  Reservoir -> Res
     -  Rest -> Rst
-    -  Retreat -> Rt
-    -  Retreat -> Rtt
+    -  Retreat -> Rt,Rtt
     -  Return -> Rtn
-    -  Ridge -> Rdg
-    -  Ridge -> Rdge
+    -  Ridge -> Rdg,Rdge
     -  Ridges -> Rdgs
     -  Ridgeway -> Rgwy
     -  Right of Way -> Rowy
     -  Rise -> Ri
-    -  River -> R
-    -  River -> Riv
-    -  River -> Rvr
+    -  ^River -> R,Riv,Rvr
+    -  River$ -> R,Riv,Rvr
     -  Riverway -> Rvwy
     -  Riviera -> Rvra
     -  Road -> Rd
     -  Roads -> Rds
     -  Roadside -> Rdsd
-    -  Roadway -> Rdwy
-    -  Roadway -> Rdy
+    -  Roadway -> Rdwy,Rdy
     -  Rocks -> Rks
     -  Ronde -> Rnde
     -  Rosebowl -> Rsbl
     -  Rotary -> Rty
     -  Round -> Rnd
-    -  Route -> Rt
-    -  Route -> Rte
+    -  Route -> Rt,Rte
     -  Saint -> St
     -  Saints -> SS
     -  Senior -> Sr
-    -  Serviceway -> Swy
-    -  Serviceway -> Svwy
+    -  Serviceway -> Swy,Svwy
     -  Shoal -> Shl
     -  Shore -> Shr
     -  Shores -> Shrs
@@ -421,8 +321,7 @@
     -  Skyway -> Skwy
     -  Slope -> Slpe
     -  Sound -> Snd
-    -  South -> S
-    -  South -> Sth
+    -  South -> S,Sth
     -  Southeast -> SE
     -  Southwest -> SW
     -  Spring -> Spg
@@ -431,13 +330,10 @@
     -  Square -> Sq
     -  Squares -> Sqs
     -  Stairway -> Strwy
-    -  State Highway -> SH
-    -  State Highway -> SHwy
+    -  State Highway -> SH,SHwy
     -  State Route -> SR
-    -  Station -> Sta
-    -  Station -> Stn
-    -  Strand -> Sd
-    -  Strand -> Stra
+    -  Station -> Sta,Stn
+    -  Strand -> Sd,Stra
     -  Stravenue -> Stra
     -  Stream -> Strm
     -  Street -> St
@@ -447,61 +343,43 @@
     -  Summit -> Smt
     -  Tarn -> Tn
     -  Terminal -> Term
-    -  Terrace -> Tce
-    -  Terrace -> Ter
-    -  Terrace -> Terr
-    -  Thoroughfare -> Thfr
-    -  Thoroughfare -> Thor
+    -  Terrace -> Tce,Ter,Terr
+    -  Thoroughfare -> Thfr,Thor
     -  Throughway -> Trwy
-    -  Tollway -> Tlwy
-    -  Tollway -> Twy
+    -  Tollway -> Tlwy,Twy
     -  Towers -> Twrs
     -  Township -> Twp
     -  Trace -> Trce
-    -  Track -> Tr
-    -  Track -> Trak
-    -  Track -> Trk
+    -  Track -> Tr,Trak,Trk
     -  Trafficway -> Trfy
     -  Trail -> Trl
     -  Trailer -> Trlr
     -  Triangle -> Tri
     -  Trunkway -> Tkwy
-    -  Tunnel -> Tun
-    -  Tunnel -> Tunl
-    -  Turn -> Tn
-    -  Turn -> Trn
-    -  Turnpike -> Tpk
-    -  Turnpike -> Tpke
-    -  Underpass -> Upas
-    -  Underpass -> Ups
+    -  Tunnel -> Tun,Tunl
+    -  Turn -> Tn,Trn
+    -  Turnpike -> Tpk,Tpke
+    -  Underpass -> Upas,Ups
     -  Union -> Un
     -  Unions -> Uns
-    -  University -> Uni
-    -  University -> Univ
+    -  University -> Uni,Univ
     -  Upper -> Up
     -  Upper -> Upr
     -  Vale -> Va
     -  Valley -> Vly
     -  Valley -> Vy
     -  Valleys -> Vlys
-    -  Viaduct -> Vdct
-    -  Viaduct -> Via
-    -  Viaduct -> Viad
+    -  Viaduct$ -> Vdct,Via,Viad
     -  View -> Vw
     -  Views -> Vws
-    -  Village -> Vill
-    -  Village -> Vlg
+    -  Village -> Vill,Vlg
     -  Villages -> Vlgs
     -  Villas -> Vlls
     -  Ville -> Vl
-    -  Vista -> Vis
-    -  Vista -> Vst
-    -  Vista -> Vsta
-    -  Walk -> Wk
-    -  Walk -> Wlk
+    -  Vista -> Vis,Vst,Vsta
+    -  Walk -> Wk,Wlk
     -  Walks -> Walk
-    -  Walkway -> Wkwy
-    -  Walkway -> Wky
+    -  Walkway -> Wkwy,Wky
     -  Waters -> Wtr
     -  Way -> Wy
     -  Well -> Wl
diff --git a/src/nominatim_api/localization.py b/src/nominatim_api/localization.py
index bbf9225b..3414286e 100644
--- a/src/nominatim_api/localization.py
+++ b/src/nominatim_api/localization.py
@@ -8,6 +8,7 @@
 Helper functions for localizing names of results.
 """
 from typing import Mapping, List, Optional
+from .config import Configuration
 
 import re
 
@@ -20,14 +21,18 @@ class Locales:
     """
 
     def __init__(self, langs: Optional[List[str]] = None):
+        self.config = Configuration(None)
         self.languages = langs or []
         self.name_tags: List[str] = []
 
-        # Build the list of supported tags. It is currently hard-coded.
-        self._add_lang_tags('name')
-        self._add_tags('name', 'brand')
-        self._add_lang_tags('official_name', 'short_name')
-        self._add_tags('official_name', 'short_name', 'ref')
+        parts = self.config.OUTPUT_NAMES.split(',')
+
+        for part in parts:
+            part = part.strip()
+            if part.endswith(":XX"):
+                self._add_lang_tags(part[:-3])
+            else:
+                self._add_tags(part)
 
     def __bool__(self) -> bool:
         return len(self.languages) > 0
diff --git a/src/nominatim_api/query_preprocessing/regex_replace.py b/src/nominatim_api/query_preprocessing/regex_replace.py
new file mode 100644
index 00000000..b3a02495
--- /dev/null
+++ b/src/nominatim_api/query_preprocessing/regex_replace.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2025 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+This preprocessor replaces values in a given input based on pre-defined regex rules.
+
+Arguments:
+    pattern: Regex pattern to be applied on the input
+    replace: The string that it is to be replaced with
+"""
+from typing import List
+import re
+
+from .config import QueryConfig
+from .base import QueryProcessingFunc
+from ..search.query import Phrase
+
+
+class _GenericPreprocessing:
+    """Perform replacements to input phrases using custom regex patterns."""
+
+    def __init__(self, config: QueryConfig) -> None:
+        """Initialise the _GenericPreprocessing class with patterns from the ICU config file."""
+        self.config = config
+
+        match_patterns = self.config.get('replacements', 'Key not found')
+        self.compiled_patterns = [
+            (re.compile(item['pattern']), item['replace']) for item in match_patterns
+            ]
+
+    def split_phrase(self, phrase: Phrase) -> Phrase:
+        """This function performs replacements on the given text using regex patterns."""
+        for item in self.compiled_patterns:
+            phrase.text = item[0].sub(item[1], phrase.text)
+
+        return phrase
+
+    def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
+        """
+        Return the final Phrase list.
+        Returns an empty list if there is nothing left after split_phrase.
+        """
+        result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
+        return result
+
+
+def create(config: QueryConfig) -> QueryProcessingFunc:
+    """ Create a function for generic preprocessing."""
+    return _GenericPreprocessing(config)
diff --git a/src/nominatim_api/search/db_search_builder.py b/src/nominatim_api/search/db_search_builder.py
index c63803d2..0292335e 100644
--- a/src/nominatim_api/search/db_search_builder.py
+++ b/src/nominatim_api/search/db_search_builder.py
@@ -208,7 +208,7 @@ class SearchBuilder:
         addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
         addr_tokens = list({t.token for t in addr_partials})
 
-        exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
+        exp_count = min(t.count for t in name_partials.values()) / (3**(len(name_partials) - 1))
 
         if (len(name_partials) > 3 or exp_count < 8000):
             yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
@@ -264,8 +264,6 @@ class SearchBuilder:
             address lookups will use the index, when the occurrences are not
             too many.
         """
-        # At this point drop unindexed partials from the address.
-        # This might yield wrong results, nothing we can do about that.
         if use_lookup:
             addr_restrict_tokens = []
             addr_lookup_tokens = [t.token for t in addr_partials]
diff --git a/src/nominatim_api/sql/sqlalchemy_functions.py b/src/nominatim_api/sql/sqlalchemy_functions.py
index 81fc83d6..00830f33 100644
--- a/src/nominatim_api/sql/sqlalchemy_functions.py
+++ b/src/nominatim_api/sql/sqlalchemy_functions.py
@@ -122,15 +122,18 @@ class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
 
     def __init__(self, table: sa.Table) -> None:
         super().__init__(table.c.rank_address,
-                         table.c.housenumber, table.c.name)
+                         table.c.housenumber, table.c.name, table.c.address)
 
 
 @compiles(IsAddressPoint)
 def default_is_address_point(element: IsAddressPoint,
                              compiler: 'sa.Compiled', **kw: Any) -> str:
-    rank, hnr, name = list(element.clauses)
-    return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
+    rank, hnr, name, address = list(element.clauses)
+    return "(%s = 30 AND (%s IS NULL OR NOT %s ? '_inherited')" \
+           " AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
                 compiler.process(rank, **kw),
+                compiler.process(address, **kw),
+                compiler.process(address, **kw),
                 compiler.process(hnr, **kw),
                 compiler.process(name, **kw))
 
@@ -138,9 +141,11 @@ def default_is_address_point(element: IsAddressPoint,
 @compiles(IsAddressPoint, 'sqlite')
 def sqlite_is_address_point(element: IsAddressPoint,
                             compiler: 'sa.Compiled', **kw: Any) -> str:
-    rank, hnr, name = list(element.clauses)
-    return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
+    rank, hnr, name, address = list(element.clauses)
+    return "(%s = 30 AND json_extract(%s, '$._inherited') IS NULL" \
+           " AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
                 compiler.process(rank, **kw),
+                compiler.process(address, **kw),
                 compiler.process(hnr, **kw),
                 compiler.process(name, **kw))
 
diff --git a/src/nominatim_api/sql/sqlalchemy_types/geometry.py b/src/nominatim_api/sql/sqlalchemy_types/geometry.py
index 90adcce8..583568c4 100644
--- a/src/nominatim_api/sql/sqlalchemy_types/geometry.py
+++ b/src/nominatim_api/sql/sqlalchemy_types/geometry.py
@@ -173,7 +173,7 @@ class Geometry(types.UserDefinedType):  # type: ignore[type-arg]
     def __init__(self, subtype: str = 'Geometry'):
         self.subtype = subtype
 
-    def get_col_spec(self) -> str:
+    def get_col_spec(self, **_: Any) -> str:
         return f'GEOMETRY({self.subtype}, 4326)'
 
     def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
diff --git a/src/nominatim_api/version.py b/src/nominatim_api/version.py
index fc401248..3c98435d 100644
--- a/src/nominatim_api/version.py
+++ b/src/nominatim_api/version.py
@@ -8,4 +8,4 @@
 Version information for the Nominatim API.
 """
 
-NOMINATIM_API_VERSION = '5.0.0'
+NOMINATIM_API_VERSION = '5.1.0'
diff --git a/src/nominatim_db/tokenizer/icu_tokenizer.py b/src/nominatim_db/tokenizer/icu_tokenizer.py
index 858cb64c..19b83863 100644
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -121,10 +121,10 @@ class ICUTokenizer(AbstractTokenizer):
                            SELECT unnest(nameaddress_vector) as id, count(*)
                                  FROM search_name GROUP BY id)
                   SELECT coalesce(a.id, w.id) as id,
-                         (CASE WHEN w.count is null THEN '{}'::JSONB
+                         (CASE WHEN w.count is null or w.count <= 1 THEN '{}'::JSONB
                               ELSE jsonb_build_object('count', w.count) END
                           ||
-                          CASE WHEN a.count is null THEN '{}'::JSONB
+                          CASE WHEN a.count is null or a.count <= 1 THEN '{}'::JSONB
                               ELSE jsonb_build_object('addr_count', a.count) END) as info
                   FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
                   """)
@@ -134,9 +134,10 @@ class ICUTokenizer(AbstractTokenizer):
                 drop_tables(conn, 'tmp_word')
                 cur.execute("""CREATE TABLE tmp_word AS
                                 SELECT word_id, word_token, type, word,
-                                       (CASE WHEN wf.info is null THEN word.info
-                                        ELSE coalesce(word.info, '{}'::jsonb) || wf.info
-                                        END) as info
+                                       coalesce(word.info, '{}'::jsonb)
+                                       - 'count' - 'addr_count' ||
+                                       coalesce(wf.info, '{}'::jsonb)
+                                       as info
                                 FROM word LEFT JOIN word_frequencies wf
                                      ON word.word_id = wf.id
                                 ORDER BY word_id
@@ -585,10 +586,14 @@ class ICUNameAnalyzer(AbstractAnalyzer):
             if word_id:
                 result = self._cache.housenumbers.get(word_id, result)
                 if result[0] is None:
-                    variants = analyzer.compute_variants(word_id)
+                    varout = analyzer.compute_variants(word_id)
+                    if isinstance(varout, tuple):
+                        variants = varout[0]
+                    else:
+                        variants = varout
                     if variants:
                         hid = execute_scalar(self.conn, "SELECT create_analyzed_hnr_id(%s, %s)",
-                                             (word_id, list(variants)))
+                                             (word_id, variants))
                         result = hid, variants[0]
                         self._cache.housenumbers[word_id] = result
 
@@ -633,13 +638,17 @@ class ICUNameAnalyzer(AbstractAnalyzer):
 
             full, part = self._cache.names.get(token_id, (None, None))
             if full is None:
-                variants = analyzer.compute_variants(word_id)
+                varset = analyzer.compute_variants(word_id)
+                if isinstance(varset, tuple):
+                    variants, lookups = varset
+                else:
+                    variants, lookups = varset, None
                 if not variants:
                     continue
 
                 with self.conn.cursor() as cur:
-                    cur.execute("SELECT * FROM getorcreate_full_word(%s, %s)",
-                                (token_id, variants))
+                    cur.execute("SELECT * FROM getorcreate_full_word(%s, %s, %s)",
+                                (token_id, variants, lookups))
                     full, part = cast(Tuple[int, List[int]], cur.fetchone())
 
                 self._cache.names[token_id] = (full, part)
diff --git a/src/nominatim_db/tokenizer/token_analysis/base.py b/src/nominatim_db/tokenizer/token_analysis/base.py
index 52ee8013..186f1d3e 100644
--- a/src/nominatim_db/tokenizer/token_analysis/base.py
+++ b/src/nominatim_db/tokenizer/token_analysis/base.py
@@ -7,7 +7,7 @@
 """
 Common data types and protocols for analysers.
 """
-from typing import Mapping, List, Any
+from typing import Mapping, List, Any, Union, Tuple
 
 from ...typing import Protocol
 from ...data.place_name import PlaceName
@@ -33,7 +33,7 @@ class Analyzer(Protocol):
                     for example because the character set in use does not match.
         """
 
-    def compute_variants(self, canonical_id: str) -> List[str]:
+    def compute_variants(self, canonical_id: str) -> Union[List[str], Tuple[List[str], List[str]]]:
         """ Compute the transliterated spelling variants for the given
             canonical ID.
 
diff --git a/src/nominatim_db/tokenizer/token_analysis/generic.py b/src/nominatim_db/tokenizer/token_analysis/generic.py
index fa9dc4df..b01cebf7 100644
--- a/src/nominatim_db/tokenizer/token_analysis/generic.py
+++ b/src/nominatim_db/tokenizer/token_analysis/generic.py
@@ -7,7 +7,7 @@
 """
 Generic processor for names that creates abbreviation variants.
 """
-from typing import Mapping, Dict, Any, Iterable, Iterator, Optional, List, cast
+from typing import Mapping, Dict, Any, Iterable, Optional, List, cast, Tuple
 import itertools
 
 from ...errors import UsageError
@@ -78,7 +78,7 @@ class GenericTokenAnalysis:
         """
         return cast(str, self.norm.transliterate(name.name)).strip()
 
-    def compute_variants(self, norm_name: str) -> List[str]:
+    def compute_variants(self, norm_name: str) -> Tuple[List[str], List[str]]:
         """ Compute the spelling variants for the given normalized name
             and transliterate the result.
         """
@@ -87,18 +87,20 @@ class GenericTokenAnalysis:
         for mutation in self.mutations:
             variants = mutation.generate(variants)
 
-        return [name for name in self._transliterate_unique_list(norm_name, variants) if name]
-
-    def _transliterate_unique_list(self, norm_name: str,
-                                   iterable: Iterable[str]) -> Iterator[Optional[str]]:
-        seen = set()
+        varset = set(map(str.strip, variants))
         if self.variant_only:
-            seen.add(norm_name)
+            varset.discard(norm_name)
+
+        trans = []
+        norm = []
+
+        for var in varset:
+            t = self.to_ascii.transliterate(var).strip()
+            if t:
+                trans.append(t)
+                norm.append(var)
 
-        for variant in map(str.strip, iterable):
-            if variant not in seen:
-                seen.add(variant)
-                yield self.to_ascii.transliterate(variant).strip()
+        return trans, norm
 
     def _generate_word_variants(self, norm_name: str) -> Iterable[str]:
         baseform = '^ ' + norm_name + ' ^'
diff --git a/src/nominatim_db/version.py b/src/nominatim_db/version.py
index 26856498..070417e3 100644
--- a/src/nominatim_db/version.py
+++ b/src/nominatim_db/version.py
@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
     return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
 
 
-NOMINATIM_VERSION = parse_version('5.0.0-0')
+NOMINATIM_VERSION = parse_version('5.1.0-0')
 
 POSTGRESQL_REQUIRED_VERSION = (12, 0)
 POSTGIS_REQUIRED_VERSION = (3, 0)
diff --git a/test/python/api/query_processing/test_regex_replace.py b/test/python/api/query_processing/test_regex_replace.py
new file mode 100644
index 00000000..ef759ba1
--- /dev/null
+++ b/test/python/api/query_processing/test_regex_replace.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2025 by the Nominatim developer community.
+# For a full list of authors see the git log.
+'''
+Tests for replacing values in an input using custom regex.
+'''
+import pytest
+
+import nominatim_api.search.query as qmod
+from nominatim_api.query_preprocessing.config import QueryConfig
+from nominatim_api.query_preprocessing import regex_replace
+
+
+def run_preprocessor_on(query):
+    config = QueryConfig()
+    config.set_normalizer(None)
+
+    config['replacements'] = [
+        {'pattern': r'\b(?:\d{1,3}\.){3}\d{1,3}\b', 'replace': ''},  # IPv4
+        {'pattern': r'https?://\S+', 'replace': ''}  # HTTP/HTTPS URLs
+    ]
+
+    proc = regex_replace.create(config)
+    return proc(query)
+
+
+@pytest.mark.parametrize('inp,outp', [
+    (['45.67.89.101'], []),
+    (['198.51.100.23'], []),
+    (['203.0.113.255'], []),
+    (['http://www.openstreetmap.org'], []),
+    (['https://www.openstreetmap.org/edit'], []),
+    (['http://osm.org'], []),
+    (['https://www.openstreetmap.org/user/abc'], []),
+    (['https://tile.openstreetmap.org/12/2048/2048.png'], []),
+    (['Check the map at https://www.openstreetmap.org'], ['Check the map at ']),
+    (['Use 203.0.113.255 for routing'], ['Use  for routing']),
+    (['Find maps at https://osm.org and http://openstreetmap.org'], ['Find maps at  and ']),
+    (['203.0.113.255', 'Some Address'], ['Some Address']),
+    (['https://osm.org', 'Another Place'], ['Another Place']),
+])
+def test_split_phrases(inp, outp):
+    query = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in inp]
+
+    out = run_preprocessor_on(query)
+    assert out == [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]
diff --git a/test/python/api/test_api_reverse.py b/test/python/api/test_api_reverse.py
index 91074ecb..d7d3ba7e 100644
--- a/test/python/api/test_api_reverse.py
+++ b/test/python/api/test_api_reverse.py
@@ -68,7 +68,8 @@ def test_reverse_ignore_unindexed(apiobj, frontend):
                           (0.7, napi.DataLayer.NATURAL, 227),
                           (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.RAILWAY, 225),
                           (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225),
-                          (5, napi.DataLayer.ADDRESS, 229)])
+                          (5, napi.DataLayer.ADDRESS, 229),
+                          (5.0001, napi.DataLayer.ADDRESS, 229)])
 def test_reverse_rank_30_layers(apiobj, frontend, y, layer, place_id):
     apiobj.add_placex(place_id=223, osm_type='N', class_='place', type='house',
                       housenumber='1',
@@ -96,6 +97,12 @@ def test_reverse_rank_30_layers(apiobj, frontend, y, layer, place_id):
                       rank_address=30,
                       rank_search=30,
                       centroid=(1.3, 5))
+    apiobj.add_placex(place_id=230, class_='place', type='house',
+                      housenumber='2',
+                      address={'_inherited': ''},
+                      rank_address=30,
+                      rank_search=30,
+                      centroid=(1.3, 5.0001))
 
     api = frontend(apiobj, options=API_OPTIONS)
     assert api.reverse((1.3, y), layers=layer).place_id == place_id
diff --git a/test/python/api/test_localization.py b/test/python/api/test_localization.py
index 0a30cdc1..c3e02596 100644
--- a/test/python/api/test_localization.py
+++ b/test/python/api/test_localization.py
@@ -27,6 +27,62 @@ def test_display_name_none_localized():
     assert loc.display_name({'ref': '34', 'name:de': 'DE'}) == '34'
 
 
+def test_output_names_none_localized():
+    loc = Locales()
+
+    expected_tags = [
+        'name', '_place_name', 'brand', '_place_brand', 'official_name', '_place_official_name',
+        'short_name', '_place_short_name', 'ref', '_place_ref'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_none_localized_and_custom_output_names(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,entrance:XX,name,brand,test_tag,'
+        'official_name:XX,short_name:XX,alt_name:XX'
+    )
+    loc = Locales()
+
+    expected_tags = [
+        'name', '_place_name', 'brand', '_place_brand', 'test_tag', '_place_test_tag'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_none_localized_and_custom_output_names_more_than_two_changes(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,brand,test_tag:XX,official_name,short_name:XX,'
+        'alt_name,another_tag_with:XX,another_tag_withoutXX'
+    )
+    loc = Locales()
+
+    expected_tags = [
+        'brand', '_place_brand', 'official_name', '_place_official_name', 'alt_name',
+        '_place_alt_name', 'another_tag_withoutXX', '_place_another_tag_withoutXX'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_none_localized_and_custom_output_names_including_space(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,name ,short_name:XX, short_name'
+    )
+    loc = Locales()
+
+    expected_tags = [
+        'name', '_place_name', 'short_name', '_place_short_name'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
 def test_display_name_localized():
     loc = Locales(['en', 'de'])
 
@@ -35,6 +91,146 @@ def test_display_name_localized():
     assert loc.display_name({'ref': '34', 'name:de': 'DE'}) == 'DE'
 
 
+def test_output_names_localized():
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name:en', '_place_name:en', 'name:es', '_place_name:es', 'name', '_place_name', 'brand',
+        '_place_brand', 'official_name:en', '_place_official_name:en', 'official_name:es',
+        '_place_official_name:es', 'short_name:en', '_place_short_name:en', 'short_name:es',
+        '_place_short_name:es', 'official_name', '_place_official_name', 'short_name',
+        '_place_short_name', 'ref', '_place_ref'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names_including_space(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,name ,short_name:XX, short_name'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name:en', '_place_name:en', 'name:es', '_place_name:es',
+        'name', '_place_name',
+        'short_name:en', '_place_short_name:en', 'short_name:es', '_place_short_name:es',
+        'short_name', '_place_short_name'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,entrance:XX,name,brand,test_tag,official_name:XX,short_name:XX,alt_name:XX'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name:en', '_place_name:en', 'name:es', '_place_name:es', 'entrance:en',
+        '_place_entrance:en', 'entrance:es', '_place_entrance:es', 'name', '_place_name',
+        'brand', '_place_brand', 'test_tag', '_place_test_tag', 'official_name:en',
+        '_place_official_name:en', 'official_name:es', '_place_official_name:es',
+        'short_name:en', '_place_short_name:en', 'short_name:es', '_place_short_name:es',
+        'alt_name:en', '_place_alt_name:en', 'alt_name:es', '_place_alt_name:es'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names_start_with_tag_that_has_no_XX(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name,brand,test_tag,official_name:XX,short_name:XX,alt_name:XX'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name', '_place_name', 'brand', '_place_brand', 'test_tag', '_place_test_tag',
+        'official_name:en', '_place_official_name:en', 'official_name:es',
+        '_place_official_name:es', 'short_name:en', '_place_short_name:en', 'short_name:es',
+        '_place_short_name:es', 'alt_name:en', '_place_alt_name:en', 'alt_name:es',
+        '_place_alt_name:es'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names_no_named_tags(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name,brand,test_tag'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name', '_place_name', 'brand', '_place_brand', 'test_tag', '_place_test_tag'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names_only_named_tags(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,entrance:XX,official_name:XX,short_name:XX,alt_name:XX'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name:en', '_place_name:en', 'name:es', '_place_name:es', 'entrance:en',
+        '_place_entrance:en', 'entrance:es', '_place_entrance:es', 'official_name:en',
+        '_place_official_name:en', 'official_name:es', '_place_official_name:es',
+        'short_name:en', '_place_short_name:en', 'short_name:es', '_place_short_name:es',
+        'alt_name:en', '_place_alt_name:en', 'alt_name:es', '_place_alt_name:es'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names_more_than_two_changes(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,brand,test_tag:XX,official_name,short_name:XX,'
+        'alt_name,another_tag_with:XX,another_tag_withoutXX'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name:en', '_place_name:en', 'name:es', '_place_name:es', 'brand', '_place_brand',
+        'test_tag:en', '_place_test_tag:en', 'test_tag:es', '_place_test_tag:es', 'official_name',
+        '_place_official_name', 'short_name:en', '_place_short_name:en', 'short_name:es',
+        '_place_short_name:es', 'alt_name', '_place_alt_name', 'another_tag_with:en',
+        '_place_another_tag_with:en', 'another_tag_with:es', '_place_another_tag_with:es',
+        'another_tag_withoutXX', '_place_another_tag_withoutXX'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
+def test_output_names_localized_and_custom_output_names_XX_in_the_middle(monkeypatch):
+    monkeypatch.setenv(
+        'NOMINATIM_OUTPUT_NAMES',
+        'name:XX,br:XXand,test_tag:XX,official_name,sh:XXort_name:XX,'
+        'alt_name,another_tag_with:XX,another_tag_withoutXX'
+    )
+    loc = Locales(['en', 'es'])
+
+    expected_tags = [
+        'name:en', '_place_name:en', 'name:es', '_place_name:es', 'br:XXand', '_place_br:XXand',
+        'test_tag:en', '_place_test_tag:en', 'test_tag:es', '_place_test_tag:es', 'official_name',
+        '_place_official_name', 'sh:XXort_name:en', '_place_sh:XXort_name:en', 'sh:XXort_name:es',
+        '_place_sh:XXort_name:es', 'alt_name', '_place_alt_name', 'another_tag_with:en',
+        '_place_another_tag_with:en', 'another_tag_with:es', '_place_another_tag_with:es',
+        'another_tag_withoutXX', '_place_another_tag_withoutXX'
+    ]
+
+    assert loc.name_tags == expected_tags, f'Expected {expected_tags}, but got {loc.name_tags}'
+
+
 def test_display_name_preference():
     loc = Locales(['en', 'de'])
 
diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py
index ce00281c..12cef894 100644
--- a/test/python/tokenizer/test_icu.py
+++ b/test/python/tokenizer/test_icu.py
@@ -230,19 +230,20 @@ def test_update_statistics(word_table, table_factory, temp_db_cursor,
                            tokenizer_factory, test_config):
     word_table.add_full_word(1000, 'hello')
     word_table.add_full_word(1001, 'bye')
+    word_table.add_full_word(1002, 'town')
     table_factory('search_name',
                   'place_id BIGINT, name_vector INT[], nameaddress_vector INT[]',
-                  [(12, [1000], [1001])])
+                  [(12, [1000], [1001]), (13, [1001], [1002]), (14, [1000, 1001], [1002])])
     tok = tokenizer_factory()
 
     tok.update_statistics(test_config)
 
-    assert temp_db_cursor.scalar("""SELECT count(*) FROM word
-                                    WHERE type = 'W' and word_id = 1000 and
-                                          (info->>'count')::int > 0""") == 1
-    assert temp_db_cursor.scalar("""SELECT count(*) FROM word
-                                    WHERE type = 'W' and word_id = 1001 and
-                                          (info->>'addr_count')::int > 0""") == 1
+    assert temp_db_cursor.row_set("""SELECT word_id,
+                                            (info->>'count')::int,
+                                            (info->>'addr_count')::int
+                                     FROM word
+                                     WHERE type = 'W'""") == \
+        {(1000, 2, None), (1001, 2, None), (1002, None, 2)}
 
 
 def test_normalize_postcode(analyzer):
diff --git a/test/python/tokenizer/token_analysis/test_generic.py b/test/python/tokenizer/token_analysis/test_generic.py
index 02870f24..48f2483b 100644
--- a/test/python/tokenizer/token_analysis/test_generic.py
+++ b/test/python/tokenizer/token_analysis/test_generic.py
@@ -40,7 +40,7 @@ def make_analyser(*variants, variant_only=False):
 
 def get_normalized_variants(proc, name):
     norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
-    return proc.compute_variants(norm.transliterate(name).strip())
+    return proc.compute_variants(norm.transliterate(name).strip())[0]
 
 
 def test_no_variants():
diff --git a/test/python/tokenizer/token_analysis/test_generic_mutation.py b/test/python/tokenizer/token_analysis/test_generic_mutation.py
index 2ce2236a..e0507e4c 100644
--- a/test/python/tokenizer/token_analysis/test_generic_mutation.py
+++ b/test/python/tokenizer/token_analysis/test_generic_mutation.py
@@ -40,7 +40,7 @@ class TestMutationNoVariants:
 
     def variants(self, name):
         norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
-        return set(self.analysis.compute_variants(norm.transliterate(name).strip()))
+        return set(self.analysis.compute_variants(norm.transliterate(name).strip())[0])
 
     @pytest.mark.parametrize('pattern', ('(capture)', ['a list']))
     def test_bad_pattern(self, pattern):