]> git.openstreetmap.org Git - nominatim.git/commitdiff
simplify weigh_search() function
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 5 Dec 2023 15:07:56 +0000 (16:07 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 7 Dec 2023 08:31:00 +0000 (09:31 +0100)
Use JSON arrays which can have mixed types and therefore have
a more logical structure than separate arrays. Avoid JSON dicts
because of their verboseness.

lib-sql/functions/ranking.sql
nominatim/api/search/db_search_fields.py
nominatim/utils/json_writer.py

index 0b18954cedb985ab71430b20762958f7571dd6da..97a0cde38e2b6aa6ef8aebf63af2611894502fbd 100644 (file)
@@ -287,21 +287,19 @@ LANGUAGE plpgsql IMMUTABLE;
 
 
 CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
-                                        term_vectors TEXT[],
-                                        weight_vectors FLOAT[],
+                                        rankings TEXT,
                                         def_weight FLOAT)
   RETURNS FLOAT
   AS $$
 DECLARE
-  pos INT := 1;
-  terms TEXT;
+  rank JSON;
 BEGIN
-  FOREACH terms IN ARRAY term_vectors
+  FOR rank IN
+    SELECT * FROM json_array_elements(rankings::JSON)
   LOOP
-    IF search_vector @> terms::INTEGER[] THEN
-      RETURN weight_vectors[pos];
+    IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
+      RETURN (rank->>0)::float;
     END IF;
-    pos := pos + 1;
   END LOOP;
   RETURN def_weight;
 END;
index 52693e95fce673026d97c545bc70b37ad52a17cf..324a7acc2cafe5a553dc60fdb6f5ca1b948568ae 100644 (file)
@@ -14,6 +14,7 @@ import sqlalchemy as sa
 
 from nominatim.typing import SaFromClause, SaColumn, SaExpression
 from nominatim.api.search.query import Token
+from nominatim.utils.json_writer import JsonWriter
 
 @dataclasses.dataclass
 class WeightedStrings:
@@ -128,11 +129,17 @@ class FieldRanking:
         """
         assert self.rankings
 
-        return sa.func.weigh_search(table.c[self.column],
-                                    [f"{{{','.join((str(s) for s in r.tokens))}}}"
-                                     for r in self.rankings],
-                                    [r.penalty for r in self.rankings],
-                                    self.default)
+        rout = JsonWriter().start_array()
+        for rank in self.rankings:
+            rout.start_array().value(rank.penalty).next()
+            rout.start_array()
+            for token in rank.tokens:
+                rout.value(token).next()
+            rout.end_array()
+            rout.end_array().next()
+        rout.end_array()
+
+        return sa.func.weigh_search(table.c[self.column], rout(), self.default)
 
 
 @dataclasses.dataclass
index bb642233e78d8c4234afb7e3bc54c4cd0f69cd8b..fcc355d5eee9fa2331ad47e47eb2a4ac18dac078 100644 (file)
@@ -76,8 +76,8 @@ class JsonWriter:
     def end_array(self) -> 'JsonWriter':
         """ Write the closing bracket of a JSON array.
         """
-        assert self.pending in (',', '[', '')
-        if self.pending == '[':
+        assert self.pending in (',', '[', ']', ')', '')
+        if self.pending not in (',', ''):
             self.data.write(self.pending)
         self.pending = ']'
         return self