From: Sarah Hoffmann Date: Wed, 19 Mar 2025 15:00:52 +0000 (+0100) Subject: Merge pull request #3678 from lonvia/search-tweaks X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/e4295dba10bdb05045e35c772b3d8ca3cb042fd1?hp=72d3360fa2c0789e4741c2e078524ee5e17d8fbc Merge pull request #3678 from lonvia/search-tweaks Some minor tweaks to postcode parsing in query --- diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py index b3e14f6a..1bd0030d 100644 --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@ -193,10 +193,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): self.add_extra_tokens(query) for start, end, pc in self.postcode_parser.parse(query): + term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1]) query.add_token(qmod.TokenRange(start, end), qmod.TOKEN_POSTCODE, ICUToken(penalty=0.1, token=0, count=1, addr_count=1, - lookup_word=pc, word_token=pc, info=None)) + lookup_word=pc, word_token=term, + info=None)) self.rerank_tokens(query) log().table_dump('Word tokens', _dump_word_tokens(query)) @@ -267,10 +269,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): """ for i, node, tlist in query.iter_token_lists(): if tlist.ttype == qmod.TOKEN_POSTCODE: + tlen = len(cast(ICUToken, tlist.tokens[0]).word_token) for repl in node.starting: if repl.end == tlist.end and repl.ttype != qmod.TOKEN_POSTCODE \ - and (repl.ttype != qmod.TOKEN_HOUSENUMBER - or len(tlist.tokens[0].lookup_word) > 4): + and (repl.ttype != qmod.TOKEN_HOUSENUMBER or tlen > 4): repl.add_penalty(0.39) elif (tlist.ttype == qmod.TOKEN_HOUSENUMBER and len(tlist.tokens[0].lookup_word) <= 3): diff --git a/src/nominatim_api/search/token_assignment.py b/src/nominatim_api/search/token_assignment.py index 3ca9385c..8d25aa8f 100644 --- a/src/nominatim_api/search/token_assignment.py +++ b/src/nominatim_api/search/token_assignment.py @@ -269,10 +269,9 @@ class _TokenSequence: #
, should give preference to address search if base.postcode.start == 0: penalty = self.penalty - self.direction = -1 # name searches are only possible backwards else: penalty = self.penalty + 0.1 - self.direction = 1 # name searches are only possible forwards + penalty += 0.1 * max(0, len(base.address) - 1) yield dataclasses.replace(base, penalty=penalty) def _get_assignments_address_forward(self, base: TokenAssignment, @@ -282,6 +281,11 @@ class _TokenSequence: """ first = base.address[0] + # The postcode must come after the name. + if base.postcode and base.postcode < first: + log().var_dump('skip forward', (base.postcode, first)) + return + log().comment('first word = name') yield dataclasses.replace(base, penalty=self.penalty, name=first, address=base.address[1:]) @@ -317,7 +321,12 @@ class _TokenSequence: """ last = base.address[-1] - if self.direction == -1 or len(base.address) > 1: + # The postcode must come before the name for backward direction. + if base.postcode and base.postcode > last: + log().var_dump('skip backward', (base.postcode, last)) + return + + if self.direction == -1 or len(base.address) > 1 or base.postcode: log().comment('last word = name') yield dataclasses.replace(base, penalty=self.penalty, name=last, address=base.address[:-1])