From c41f2fed2133668dc3179813261d39d3ff69cbdd Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Dec 2023 16:07:56 +0100 Subject: [PATCH] simplify weigh_search() function Use JSON arrays which can have mixed types and therefore have a more logical structure than separate arrays. Avoid JSON dicts because of their verboseness. --- lib-sql/functions/ranking.sql | 14 ++++++-------- nominatim/api/search/db_search_fields.py | 17 ++++++++++++----- nominatim/utils/json_writer.py | 4 ++-- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lib-sql/functions/ranking.sql b/lib-sql/functions/ranking.sql index 0b18954c..97a0cde3 100644 --- a/lib-sql/functions/ranking.sql +++ b/lib-sql/functions/ranking.sql @@ -287,21 +287,19 @@ LANGUAGE plpgsql IMMUTABLE; CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[], - term_vectors TEXT[], - weight_vectors FLOAT[], + rankings TEXT, def_weight FLOAT) RETURNS FLOAT AS $$ DECLARE - pos INT := 1; - terms TEXT; + rank JSON; BEGIN - FOREACH terms IN ARRAY term_vectors + FOR rank IN + SELECT * FROM json_array_elements(rankings::JSON) LOOP - IF search_vector @> terms::INTEGER[] THEN - RETURN weight_vectors[pos]; + IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN + RETURN (rank->>0)::float; END IF; - pos := pos + 1; END LOOP; RETURN def_weight; END; diff --git a/nominatim/api/search/db_search_fields.py b/nominatim/api/search/db_search_fields.py index 52693e95..324a7acc 100644 --- a/nominatim/api/search/db_search_fields.py +++ b/nominatim/api/search/db_search_fields.py @@ -14,6 +14,7 @@ import sqlalchemy as sa from nominatim.typing import SaFromClause, SaColumn, SaExpression from nominatim.api.search.query import Token +from nominatim.utils.json_writer import JsonWriter @dataclasses.dataclass class WeightedStrings: @@ -128,11 +129,17 @@ class FieldRanking: """ assert self.rankings - return sa.func.weigh_search(table.c[self.column], - [f"{{{','.join((str(s) for s in r.tokens))}}}" - for r in self.rankings], - [r.penalty for r in self.rankings], - self.default) + rout = JsonWriter().start_array() + for rank in self.rankings: + rout.start_array().value(rank.penalty).next() + rout.start_array() + for token in rank.tokens: + rout.value(token).next() + rout.end_array() + rout.end_array().next() + rout.end_array() + + return sa.func.weigh_search(table.c[self.column], rout(), self.default) @dataclasses.dataclass diff --git a/nominatim/utils/json_writer.py b/nominatim/utils/json_writer.py index bb642233..fcc355d5 100644 --- a/nominatim/utils/json_writer.py +++ b/nominatim/utils/json_writer.py @@ -76,8 +76,8 @@ class JsonWriter: def end_array(self) -> 'JsonWriter': """ Write the closing bracket of a JSON array. """ - assert self.pending in (',', '[', '') - if self.pending == '[': + assert self.pending in (',', '[', ']', ')', '') + if self.pending not in (',', ''): self.data.write(self.pending) self.pending = ']' return self -- 2.39.5