X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/146a0b29c029b771728fa11164dd49df713611f3..d48ea4f22cd60206613c193dd6aa2dd5e92029bd:/nominatim/api/search/token_assignment.py diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index 747fea6c..edddd100 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -257,6 +257,26 @@ class _TokenSequence: return True + def _get_assignments_postcode(self, base: TokenAssignment, + query_len: int) -> Iterator[TokenAssignment]: + """ Yield possible assignments of Postcode searches with an + address component. + """ + assert base.postcode is not None + + if (base.postcode.start == 0 and self.direction != -1)\ + or (base.postcode.end == query_len and self.direction != 1): + log().comment('postcode search') + #
, should give preference to address search + if base.postcode.start == 0: + penalty = self.penalty + self.direction = -1 # name searches are only possbile backwards + else: + penalty = self.penalty + 0.1 + self.direction = 1 # name searches are only possbile forewards + yield dataclasses.replace(base, penalty=penalty) + + def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Yield possible assignments for the current sequence. @@ -265,12 +285,13 @@ class _TokenSequence: """ base = TokenAssignment.from_ranges(self.seq) + num_addr_tokens = sum(t.end - t.start for t in base.address) + if num_addr_tokens > 50: + return + # Postcode search (postcode-only search is covered in next case) if base.postcode is not None and base.address: - if (base.postcode.start == 0 and self.direction != -1)\ - or (base.postcode.end == query.num_token_slots() and self.direction != 1): - log().comment('postcode search') - yield dataclasses.replace(base, penalty=self.penalty) + yield from self._get_assignments_postcode(base, query.num_token_slots()) # Postcode or country-only search if not base.address: @@ -278,6 +299,9 @@ class _TokenSequence: log().comment('postcode/country search') yield dataclasses.replace(base, penalty=self.penalty) else: + # ,
should give preference to postcode search + if base.postcode and base.postcode.start == 0: + self.penalty += 0.1 # Use entire first word as name if self.direction != -1: log().comment('first word = name') @@ -301,9 +325,13 @@ class _TokenSequence: first = base.address[0] if (not base.housenumber or first.end >= base.housenumber.start)\ and (not base.qualifier or first.start >= base.qualifier.end): + base_penalty = self.penalty + if (base.housenumber and base.housenumber.start > first.start) \ + or len(query.source) > 1: + base_penalty += 0.25 for i in range(first.start + 1, first.end): name, addr = first.split(i) - penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] + penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] log().comment(f'split first word = name ({i - first.start})') yield dataclasses.replace(base, name=name, penalty=penalty, address=[addr] + base.address[1:]) @@ -313,9 +341,14 @@ class _TokenSequence: last = base.address[-1] if (not base.housenumber or last.start <= base.housenumber.end)\ and (not base.qualifier or last.end <= base.qualifier.start): + base_penalty = self.penalty + if base.housenumber and base.housenumber.start < last.start: + base_penalty += 0.4 + if len(query.source) > 1: + base_penalty += 0.25 for i in range(last.start + 1, last.end): addr, name = last.split(i) - penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] + penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] log().comment(f'split last word = name ({i - last.start})') yield dataclasses.replace(base, name=name, penalty=penalty, address=base.address[:-1] + [addr])