]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/db/sqlalchemy_functions.py
reintroduce cutoffs when searching for very frequent words
[nominatim.git] / nominatim / db / sqlalchemy_functions.py
index 6a5809bdd3de3df069ef58314b22adc52f04117a..f576d32f06a175191823e2375dfb29a57630966b 100644 (file)
@@ -17,20 +17,19 @@ from nominatim.typing import SaColumn
 
 # pylint: disable=all
 
-class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[bool]):
+class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
     """ Check for conditions that allow partial index use on
         'idx_placex_geometry_reverse_lookupPolygon'.
 
         Needs to be constant, so that the query planner picks them up correctly
         in prepared statements.
     """
-    type = sa.Boolean()
     name = 'PlacexGeometryReverseLookuppolygon'
     inherit_cache = True
 
 
 @compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
-def _default_intersects(element: SaColumn,
+def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
                         compiler: 'sa.Compiled', **kw: Any) -> str:
     return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
             " AND placex.rank_address between 4 and 25"
@@ -41,7 +40,7 @@ def _default_intersects(element: SaColumn,
 
 
 @compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _sqlite_intersects(element: SaColumn,
+def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
                        compiler: 'sa.Compiled', **kw: Any) -> str:
     return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
             " AND placex.rank_address between 4 and 25"
@@ -51,37 +50,36 @@ def _sqlite_intersects(element: SaColumn,
             " AND placex.linked_place_id is null)")
 
 
-class IntersectsReverseDistance(sa.sql.functions.GenericFunction[bool]):
-    type = sa.Boolean()
+class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
     name = 'IntersectsReverseDistance'
     inherit_cache = True
 
     def __init__(self, table: sa.Table, geom: SaColumn) -> None:
-        super().__init__(table.c.geometry, # type: ignore[no-untyped-call]
+        super().__init__(table.c.geometry,
                          table.c.rank_search, geom)
         self.tablename = table.name
 
 
 @compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
-def default_reverse_place_diameter(element: SaColumn,
+def default_reverse_place_diameter(element: IntersectsReverseDistance,
                                    compiler: 'sa.Compiled', **kw: Any) -> str:
     table = element.tablename
-    return f"{table}.rank_address between 4 and 25"\
+    return f"({table}.rank_address between 4 and 25"\
            f" AND {table}.type != 'postcode'"\
            f" AND {table}.name is not null"\
            f" AND {table}.linked_place_id is null"\
            f" AND {table}.osm_type = 'N'" + \
-           " AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s" % \
+           " AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
                tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
 
 
 @compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_reverse_place_diameter(element: SaColumn,
+def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
                                   compiler: 'sa.Compiled', **kw: Any) -> str:
     geom1, rank, geom2 = list(element.clauses)
     table = element.tablename
 
-    return (f"{table}.rank_address between 4 and 25"\
+    return (f"({table}.rank_address between 4 and 25"\
             f" AND {table}.type != 'postcode'"\
             f" AND {table}.name is not null"\
             f" AND {table}.linked_place_id is null"\
@@ -91,21 +89,20 @@ def sqlite_reverse_place_diameter(element: SaColumn,
              " (SELECT place_id FROM placex_place_node_areas"\
              "  WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
              "  WHERE f_table_name = 'placex_place_node_areas'"\
-             "  AND search_frame = %s))") % (
+             "  AND search_frame = %s)))") % (
                 compiler.process(geom1, **kw),
                 compiler.process(geom2, **kw),
                 compiler.process(rank, **kw),
                 compiler.process(geom2, **kw))
 
 
-class IsBelowReverseDistance(sa.sql.functions.GenericFunction[bool]):
-    type = sa.Boolean()
+class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
     name = 'IsBelowReverseDistance'
     inherit_cache = True
 
 
 @compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
-def default_is_below_reverse_distance(element: SaColumn,
+def default_is_below_reverse_distance(element: IsBelowReverseDistance,
                                       compiler: 'sa.Compiled', **kw: Any) -> str:
     dist, rank = list(element.clauses)
     return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
@@ -113,35 +110,51 @@ def default_is_below_reverse_distance(element: SaColumn,
 
 
 @compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_is_below_reverse_distance(element: SaColumn,
+def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
                                      compiler: 'sa.Compiled', **kw: Any) -> str:
     dist, rank = list(element.clauses)
     return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
                                                   compiler.process(rank, **kw))
 
 
-def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
-    """ Create an expression with the necessary conditions over a placex
-        table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
-        can be used.
-    """
-    return sa.text(f"{table}.rank_address between 4 and 25"
-                   f" AND {table}.type != 'postcode'"
-                   f" AND {table}.name is not null"
-                   f" AND {table}.linked_place_id is null"
-                   f" AND {table}.osm_type = 'N'")
+class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
+    name = 'IsAddressPoint'
+    inherit_cache = True
+
+    def __init__(self, table: sa.Table) -> None:
+        super().__init__(table.c.rank_address,
+                         table.c.housenumber, table.c.name)
+
+
+@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
+def default_is_address_point(element: IsAddressPoint,
+                             compiler: 'sa.Compiled', **kw: Any) -> str:
+    rank, hnr, name = list(element.clauses)
+    return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
+                compiler.process(rank, **kw),
+                compiler.process(hnr, **kw),
+                compiler.process(name, **kw))
 
 
-class CrosscheckNames(sa.sql.functions.GenericFunction[bool]):
+@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_address_point(element: IsAddressPoint,
+                            compiler: 'sa.Compiled', **kw: Any) -> str:
+    rank, hnr, name = list(element.clauses)
+    return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
+                compiler.process(rank, **kw),
+                compiler.process(hnr, **kw),
+                compiler.process(name, **kw))
+
+
+class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
     """ Check if in the given list of names in parameters 1 any of the names
         from the JSON array in parameter 2 are contained.
     """
-    type = sa.Boolean()
     name = 'CrosscheckNames'
     inherit_cache = True
 
 @compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
-def compile_crosscheck_names(element: SaColumn,
+def compile_crosscheck_names(element: CrosscheckNames,
                              compiler: 'sa.Compiled', **kw: Any) -> str:
     arg1, arg2 = list(element.clauses)
     return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
@@ -149,7 +162,7 @@ def compile_crosscheck_names(element: SaColumn,
 
 
 @compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
-def compile_sqlite_crosscheck_names(element: SaColumn,
+def compile_sqlite_crosscheck_names(element: CrosscheckNames,
                                     compiler: 'sa.Compiled', **kw: Any) -> str:
     arg1, arg2 = list(element.clauses)
     return "EXISTS(SELECT *"\
@@ -166,30 +179,15 @@ class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
 
 
 @compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
-def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
     return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
 
 
 @compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
     return "json_each(%s)" % compiler.process(element.clauses, **kw)
 
 
-class JsonHasKey(sa.sql.functions.GenericFunction[bool]):
-    """ Return elements of a json array as a set.
-    """
-    type = sa.Boolean()
-    name = 'JsonHasKey'
-    inherit_cache = True
-
-
-@compiles(JsonHasKey) # type: ignore[no-untyped-call, misc]
-def compile_json_has_key(element: SaColumn,
-                         compiler: 'sa.Compiled', **kw: Any) -> str:
-    arg1, arg2 = list(element.clauses)
-    return "%s->%s is not null" % (compiler.process(arg1, **kw),
-                                   compiler.process(arg2, **kw))
-
 
 class Greatest(sa.sql.functions.GenericFunction[Any]):
     """ Function to compute maximum of all its input parameters.
@@ -199,5 +197,25 @@ class Greatest(sa.sql.functions.GenericFunction[Any]):
 
 
 @compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_greatest(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
     return "max(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class RegexpWord(sa.sql.functions.GenericFunction[Any]):
+    """ Check if a full word is in a given string.
+    """
+    name = 'RegexpWord'
+    inherit_cache = True
+
+
+@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
+def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s ~* ('\\m(' || %s  || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
+
+
+@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "regexp('\\b(' || %s  || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))