Merge pull request #3342 from mtmail/tyops

author Sarah Hoffmann <lonvia@denofr.de>

Wed, 28 Feb 2024 13:25:16 +0000 (14:25 +0100)

committer GitHub <noreply@github.com>

Wed, 28 Feb 2024 13:25:16 +0000 (14:25 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Wed, 28 Feb 2024 13:25:16 +0000 (14:25 +0100)
committer GitHub <noreply@github.com>
Wed, 28 Feb 2024 13:25:16 +0000 (14:25 +0100)
diff --combined nominatim/api/results.py

index fa861bf3fafbe0a90d736e19491bf83c0cd317e6,8e83d52370fbb83a0715b5f5f694581fd5bf8754..d66da6c6f5c1df705075989136da50a23c214972
--- 1/nominatim/api/results.py
--- 2/nominatim/api/results.py
+++ b/nominatim/api/results.py
@@@ -233,7 -233,7 +233,7 @@@ class BaseResult
               of the value or an artificial value computed from the place's
               search rank.
           """
- -        return self.importance or (0.7500001 - (self.rank_search/40.0))
+ +        return self.importance or (0.40001 - (self.rank_search/75.0))
   
   
       def localize(self, locales: Locales) -> None:
@@@ -466,7 -466,7 +466,7 @@@ async def add_result_details(conn: Sear
   
   
   def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
-     """ Create a new AddressLine from the results of a datbase query.
+     """ Create a new AddressLine from the results of a database query.
       """
       extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
       if 'linked_place' in extratags:
diff --combined nominatim/api/search/db_search_fields.py

index 846e1ce28dc4589969d0c25942fbb025741daa73,aa3a2dad301b703307167541f110368a364565b4..cd5717753ba722616084dc06bbfb76dda901c0fb
--- 1/nominatim/api/search/db_search_fields.py
--- 2/nominatim/api/search/db_search_fields.py
+++ b/nominatim/api/search/db_search_fields.py
@@@ -94,7 -94,7 +94,7 @@@ class RankedTokens
   
       def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
           """ Create a new RankedTokens list with the given token appended.
-             The tokens penalty as well as the given transision penalty
+             The tokens penalty as well as the given transition penalty
               are added to the overall penalty.
           """
           return RankedTokens(self.penalty + t.penalty + transition_penalty,
@@@ -199,7 -199,8 +199,7 @@@ class SearchData
               categories: Dict[Tuple[str, str], float] = {}
               min_penalty = 1000.0
               for t in tokens:
- -                if t.penalty < min_penalty:
- -                    min_penalty = t.penalty
+ +                min_penalty = min(min_penalty, t.penalty)
                   cat = t.get_category()
                   if t.penalty < categories.get(cat, 1000.0):
                       categories[cat] = t.penalty
diff --combined nominatim/api/search/db_searches.py

index b3aed35fdf77cba3beed58ad89342ff499e0293d,742f4a70f60aedf00ca5a92bc2a4e6adbd917c7a..be883953276ccf74eed7497d0ece6caf9a275498
--- 1/nominatim/api/search/db_searches.py
--- 2/nominatim/api/search/db_searches.py
+++ b/nominatim/api/search/db_searches.py
@@@ -5,7 -5,7 +5,7 @@@
   # Copyright (C) 2023 by the Nominatim developer community.
   # For a full list of authors see the git log.
   """
- Implementation of the acutal database accesses for forward search.
+ Implementation of the actual database accesses for forward search.
   """
   from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
   import abc
@@@ -700,7 -700,7 +700,7 @@@ class PlaceSearch(AbstractSearch)
                  or (details.viewbox is not None and details.viewbox.area < 0.5):
                   sql = sql.order_by(
                           penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
- -                                    else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
+ +                                    else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75)))
               sql = sql.add_columns(t.c.importance)
   
   
diff --combined nominatim/db/sql_preprocessor.py

index d3fb3058669f67fe5dc184900fc6aa9bf0705996,839f682d8a8e5db722657751e6b9e4051db9d002..af5bc3357959abf52b9518e83144403e2150564b
--- 1/nominatim/db/sql_preprocessor.py
--- 2/nominatim/db/sql_preprocessor.py
+++ b/nominatim/db/sql_preprocessor.py
@@@ -7,7 -7,7 +7,7 @@@
   """
   Preprocessing of SQL files.
   """
- -from typing import Set, Dict, Any
+ +from typing import Set, Dict, Any, cast
   import jinja2
   
   from nominatim.db.connection import Connection
@@@ -28,24 -28,13 +28,24 @@@ def _get_partitions(conn: Connection) -
   
   def _get_tables(conn: Connection) -> Set[str]:
       """ Return the set of tables currently in use.
- -        Only includes non-partitioned
       """
       with conn.cursor() as cur:
           cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
   
           return set((row[0] for row in list(cur)))
   
+ +def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
+ +    """ Returns the version of the slim middle tables.
+ +    """
+ +    if 'osm2pgsql_properties' not in tables:
+ +        return '1'
+ +
+ +    with conn.cursor() as cur:
+ +        cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+ +        row = cur.fetchone()
+ +
+ +        return cast(str, row[0]) if row is not None else '1'
+ +
   
   def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
       """ Returns a dict with tablespace expressions for the different tablespace
@@@ -95,7 -84,6 +95,7 @@@ class SQLPreprocessor
           db_info['tables'] = _get_tables(conn)
           db_info['reverse_only'] = 'search_name' not in db_info['tables']
           db_info['tablespace'] = _setup_tablespace_sql(config)
+ +        db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
   
           self.env.globals['config'] = config
           self.env.globals['db'] = db_info
@@@ -127,7 -115,7 +127,7 @@@
   
       def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
                                 **kwargs: Any) -> None:
-         """ Execure the given SQL files using parallel asynchronous connections.
+         """ Execute the given SQL files using parallel asynchronous connections.
               The keyword arguments may supply additional parameters for
               preprocessing.
   
diff --combined test/bdd/steps/steps_db_ops.py

index e6122bfe4050dd9093f56b76f7486e3196311239,493b40cc333c57610487d4ac2f76ddf33684e686..c30ee894280d4eb912a325d6669b0148e2c35d7c
--- 1/test/bdd/steps/steps_db_ops.py
--- 2/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@@ -52,52 -52,33 +52,52 @@@ def add_data_to_planet_relations(contex
           for tests on data that looks up members.
       """
       with context.db.cursor() as cur:
- -        for r in context.table:
- -            last_node = 0
- -            last_way = 0
- -            parts = []
- -            if r['members']:
- -                members = []
- -                for m in r['members'].split(','):
- -                    mid = NominatimID(m)
- -                    if mid.typ == 'N':
- -                        parts.insert(last_node, int(mid.oid))
- -                        last_node += 1
- -                        last_way += 1
- -                    elif mid.typ == 'W':
- -                        parts.insert(last_way, int(mid.oid))
- -                        last_way += 1
- -                    else:
- -                        parts.append(int(mid.oid))
- -
- -                    members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
- -            else:
- -                members = None
- -
- -            tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
- -
- -            cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
- -                           VALUES (%s, %s, %s, %s, %s, %s)""",
- -                        (r['id'], last_node, last_way, parts, members, list(tags)))
+ +        cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+ +        row = cur.fetchone()
+ +        if row is None or row[0] == '1':
+ +            for r in context.table:
+ +                last_node = 0
+ +                last_way = 0
+ +                parts = []
+ +                if r['members']:
+ +                    members = []
+ +                    for m in r['members'].split(','):
+ +                        mid = NominatimID(m)
+ +                        if mid.typ == 'N':
+ +                            parts.insert(last_node, int(mid.oid))
+ +                            last_node += 1
+ +                            last_way += 1
+ +                        elif mid.typ == 'W':
+ +                            parts.insert(last_way, int(mid.oid))
+ +                            last_way += 1
+ +                        else:
+ +                            parts.append(int(mid.oid))
+ +
+ +                        members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
+ +                else:
+ +                    members = None
+ +
+ +                tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
+ +
+ +                cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
+ +                               VALUES (%s, %s, %s, %s, %s, %s)""",
+ +                            (r['id'], last_node, last_way, parts, members, list(tags)))
+ +        else:
+ +            for r in context.table:
+ +                if r['members']:
+ +                    members = []
+ +                    for m in r['members'].split(','):
+ +                        mid = NominatimID(m)
+ +                        members.append({'ref': mid.oid, 'role': mid.cls or '', 'type': mid.typ})
+ +                else:
+ +                    members = []
+ +
+ +                tags = {h[5:]: r[h] for h in r.headings if h.startswith("tags+")}
+ +
+ +                cur.execute("""INSERT INTO planet_osm_rels (id, tags, members)
+ +                               VALUES (%s, %s, %s)""",
+ +                            (r['id'], psycopg2.extras.Json(tags),
+ +                             psycopg2.extras.Json(members)))
   
   @given("the ways")
   def add_data_to_planet_ways(context):
@@@ -105,19 -86,12 +105,19 @@@
           tests on that that looks up node ids in this table.
       """
       with context.db.cursor() as cur:
+ +        cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
+ +        row = cur.fetchone()
+ +        json_tags = row is not None and row[0] != '1'
           for r in context.table:
- -            tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
+ +            if json_tags:
+ +                tags = psycopg2.extras.Json({h[5:]: r[h] for h in r.headings if h.startswith("tags+")})
+ +            else:
+ +                tags = list(chain.from_iterable([(h[5:], r[h])
+ +                                                 for h in r.headings if h.startswith("tags+")]))
               nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
   
               cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
- -                        (r['id'], nodes, list(tags)))
+ +                        (r['id'], nodes, tags))
   
   ################################ WHEN ##################################
   
@@@ -190,7 -164,7 +190,7 @@@ def delete_places(context, oids)
   def check_place_contents(context, table, exact):
       """ Check contents of place/placex tables. Each row represents a table row
           and all data must match. Data not present in the expected table, may
-         be arbitry. The rows are identified via the 'object' column which must
+         be arbitrary. The rows are identified via the 'object' column which must
           have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
           rows match (for example because 'class' was left out and there are
           multiple entries for the given OSM object) then all must match. All
@@@ -237,7 -211,7 +237,7 @@@ def check_place_has_entry(context, tabl
   def check_search_name_contents(context, exclude):
       """ Check contents of place/placex tables. Each row represents a table row
           and all data must match. Data not present in the expected table, may
-         be arbitry. The rows are identified via the 'object' column which must
+         be arbitrary. The rows are identified via the 'object' column which must
           have an identifier of the form '<NRW><osm id>[:<class>]'. All
           expected rows are expected to be present with at least one database row.
       """
@@@ -286,7 -260,7 +286,7 @@@ def check_search_name_has_entry(context
   def check_location_postcode(context):
       """ Check full contents for location_postcode table. Each row represents a table row
           and all data must match. Data not present in the expected table, may
-         be arbitry. The rows are identified via 'country' and 'postcode' columns.
+         be arbitrary. The rows are identified via 'country' and 'postcode' columns.
           All rows must be present as excepted and there must not be additional
           rows.
       """
@@@ -343,7 -317,7 +343,7 @@@ def check_word_table_for_postcodes(cont
   def check_place_addressline(context):
       """ Check the contents of the place_addressline table. Each row represents
           a table row and all data must match. Data not present in the expected
-         table, may be arbitry. The rows are identified via the 'object' column,
+         table, may be arbitrary. The rows are identified via the 'object' column,
           representing the addressee and the 'address' column, representing the
           address item.
       """
@@@ -410,7 -384,7 +410,7 @@@ def check_location_property_osmline(con
   def check_place_contents(context, exact):
       """ Check contents of the interpolation table. Each row represents a table row
           and all data must match. Data not present in the expected table, may
-         be arbitry. The rows are identified via the 'object' column which must
+         be arbitrary. The rows are identified via the 'object' column which must
           have an identifier of the form '<osm id>[:<startnumber>]'. When multiple
           rows match (for example because 'startnumber' was left out and there are
           multiple entries for the given OSM object) then all must match. All
author	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 28 Feb 2024 13:25:16 +0000 (14:25 +0100)
committer	GitHub <noreply@github.com>
	Wed, 28 Feb 2024 13:25:16 +0000 (14:25 +0100)
		1	2
nominatim/api/results.py	patch \|	diff1 \|	diff2 \|	blob \| history
nominatim/api/search/db_search_fields.py	patch \|	diff1 \|	diff2 \|	blob \| history
nominatim/api/search/db_searches.py	patch \|	diff1 \|	diff2 \|	blob \| history
nominatim/db/sql_preprocessor.py	patch \|	diff1 \|	diff2 \|	blob \| history
test/bdd/steps/steps_db_ops.py	patch \|	diff1 \|	diff2 \|	blob \| history