]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/db/sqlalchemy_functions.py
reintroduce cutoffs when searching for very frequent words
[nominatim.git] / nominatim / db / sqlalchemy_functions.py
index cb04f7626f08b97f2ee602900849e132f65f6272..f576d32f06a175191823e2375dfb29a57630966b 100644 (file)
@@ -29,7 +29,7 @@ class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
 
 
 @compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
-def _default_intersects(element: SaColumn,
+def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
                         compiler: 'sa.Compiled', **kw: Any) -> str:
     return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
             " AND placex.rank_address between 4 and 25"
@@ -40,7 +40,7 @@ def _default_intersects(element: SaColumn,
 
 
 @compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
-def _sqlite_intersects(element: SaColumn,
+def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
                        compiler: 'sa.Compiled', **kw: Any) -> str:
     return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
             " AND placex.rank_address between 4 and 25"
@@ -55,13 +55,13 @@ class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
     inherit_cache = True
 
     def __init__(self, table: sa.Table, geom: SaColumn) -> None:
-        super().__init__(table.c.geometry, # type: ignore[no-untyped-call]
+        super().__init__(table.c.geometry,
                          table.c.rank_search, geom)
         self.tablename = table.name
 
 
 @compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
-def default_reverse_place_diameter(element: SaColumn,
+def default_reverse_place_diameter(element: IntersectsReverseDistance,
                                    compiler: 'sa.Compiled', **kw: Any) -> str:
     table = element.tablename
     return f"({table}.rank_address between 4 and 25"\
@@ -74,7 +74,7 @@ def default_reverse_place_diameter(element: SaColumn,
 
 
 @compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_reverse_place_diameter(element: SaColumn,
+def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
                                   compiler: 'sa.Compiled', **kw: Any) -> str:
     geom1, rank, geom2 = list(element.clauses)
     table = element.tablename
@@ -102,7 +102,7 @@ class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
 
 
 @compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
-def default_is_below_reverse_distance(element: SaColumn,
+def default_is_below_reverse_distance(element: IsBelowReverseDistance,
                                       compiler: 'sa.Compiled', **kw: Any) -> str:
     dist, rank = list(element.clauses)
     return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
@@ -110,36 +110,24 @@ def default_is_below_reverse_distance(element: SaColumn,
 
 
 @compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_is_below_reverse_distance(element: SaColumn,
+def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
                                      compiler: 'sa.Compiled', **kw: Any) -> str:
     dist, rank = list(element.clauses)
     return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
                                                   compiler.process(rank, **kw))
 
 
-def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
-    """ Create an expression with the necessary conditions over a placex
-        table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
-        can be used.
-    """
-    return sa.text(f"{table}.rank_address between 4 and 25"
-                   f" AND {table}.type != 'postcode'"
-                   f" AND {table}.name is not null"
-                   f" AND {table}.linked_place_id is null"
-                   f" AND {table}.osm_type = 'N'")
-
-
 class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
     name = 'IsAddressPoint'
     inherit_cache = True
 
     def __init__(self, table: sa.Table) -> None:
-        super().__init__(table.c.rank_address, # type: ignore[no-untyped-call]
+        super().__init__(table.c.rank_address,
                          table.c.housenumber, table.c.name)
 
 
 @compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
-def default_is_address_point(element: SaColumn,
+def default_is_address_point(element: IsAddressPoint,
                              compiler: 'sa.Compiled', **kw: Any) -> str:
     rank, hnr, name = list(element.clauses)
     return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
@@ -149,7 +137,7 @@ def default_is_address_point(element: SaColumn,
 
 
 @compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_is_address_point(element: SaColumn,
+def sqlite_is_address_point(element: IsAddressPoint,
                             compiler: 'sa.Compiled', **kw: Any) -> str:
     rank, hnr, name = list(element.clauses)
     return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
@@ -166,7 +154,7 @@ class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
     inherit_cache = True
 
 @compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
-def compile_crosscheck_names(element: SaColumn,
+def compile_crosscheck_names(element: CrosscheckNames,
                              compiler: 'sa.Compiled', **kw: Any) -> str:
     arg1, arg2 = list(element.clauses)
     return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
@@ -174,7 +162,7 @@ def compile_crosscheck_names(element: SaColumn,
 
 
 @compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
-def compile_sqlite_crosscheck_names(element: SaColumn,
+def compile_sqlite_crosscheck_names(element: CrosscheckNames,
                                     compiler: 'sa.Compiled', **kw: Any) -> str:
     arg1, arg2 = list(element.clauses)
     return "EXISTS(SELECT *"\
@@ -191,15 +179,16 @@ class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
 
 
 @compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
-def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
     return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
 
 
 @compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
     return "json_each(%s)" % compiler.process(element.clauses, **kw)
 
 
+
 class Greatest(sa.sql.functions.GenericFunction[Any]):
     """ Function to compute maximum of all its input parameters.
     """
@@ -208,5 +197,25 @@ class Greatest(sa.sql.functions.GenericFunction[Any]):
 
 
 @compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
-def sqlite_greatest(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
     return "max(%s)" % compiler.process(element.clauses, **kw)
+
+
+
+class RegexpWord(sa.sql.functions.GenericFunction[Any]):
+    """ Check if a full word is in a given string.
+    """
+    name = 'RegexpWord'
+    inherit_cache = True
+
+
+@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
+def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s ~* ('\\m(' || %s  || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
+
+
+@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "regexp('\\b(' || %s  || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))