X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a413aae8a3962be4d623844d867604df68a9a211..7f9cb4e68d14b0460982bee64af7b2467c2ecd1e:/nominatim/api/search/token_assignment.py diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index 747fea6c..33fb7335 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -257,6 +257,78 @@ class _TokenSequence: return True + def _get_assignments_postcode(self, base: TokenAssignment, + query_len: int) -> Iterator[TokenAssignment]: + """ Yield possible assignments of Postcode searches with an + address component. + """ + assert base.postcode is not None + + if (base.postcode.start == 0 and self.direction != -1)\ + or (base.postcode.end == query_len and self.direction != 1): + log().comment('postcode search') + #
, should give preference to address search + if base.postcode.start == 0: + penalty = self.penalty + self.direction = -1 # name searches are only possbile backwards + else: + penalty = self.penalty + 0.1 + self.direction = 1 # name searches are only possbile forwards + yield dataclasses.replace(base, penalty=penalty) + + + def _get_assignments_address_forward(self, base: TokenAssignment, + query: qmod.QueryStruct) -> Iterator[TokenAssignment]: + """ Yield possible assignments of address searches with + left-to-right reading. + """ + first = base.address[0] + + log().comment('first word = name') + yield dataclasses.replace(base, penalty=self.penalty, + name=first, address=base.address[1:]) + + if (not base.housenumber or first.end >= base.housenumber.start)\ + and (not base.qualifier or first.start >= base.qualifier.end): + base_penalty = self.penalty + if (base.housenumber and base.housenumber.start > first.start) \ + or len(query.source) > 1: + base_penalty += 0.25 + for i in range(first.start + 1, first.end): + name, addr = first.split(i) + penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] + log().comment(f'split first word = name ({i - first.start})') + yield dataclasses.replace(base, name=name, penalty=penalty, + address=[addr] + base.address[1:]) + + + def _get_assignments_address_backward(self, base: TokenAssignment, + query: qmod.QueryStruct) -> Iterator[TokenAssignment]: + """ Yield possible assignments of address searches with + right-to-left reading. + """ + last = base.address[-1] + + if self.direction == -1 or len(base.address) > 1: + log().comment('last word = name') + yield dataclasses.replace(base, penalty=self.penalty, + name=last, address=base.address[:-1]) + + if (not base.housenumber or last.start <= base.housenumber.end)\ + and (not base.qualifier or last.end <= base.qualifier.start): + base_penalty = self.penalty + if base.housenumber and base.housenumber.start < last.start: + base_penalty += 0.4 + if len(query.source) > 1: + base_penalty += 0.25 + for i in range(last.start + 1, last.end): + addr, name = last.split(i) + penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] + log().comment(f'split last word = name ({i - last.start})') + yield dataclasses.replace(base, name=name, penalty=penalty, + address=base.address[:-1] + [addr]) + + def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Yield possible assignments for the current sequence. @@ -265,12 +337,13 @@ class _TokenSequence: """ base = TokenAssignment.from_ranges(self.seq) + num_addr_tokens = sum(t.end - t.start for t in base.address) + if num_addr_tokens > 50: + return + # Postcode search (postcode-only search is covered in next case) if base.postcode is not None and base.address: - if (base.postcode.start == 0 and self.direction != -1)\ - or (base.postcode.end == query.num_token_slots() and self.direction != 1): - log().comment('postcode search') - yield dataclasses.replace(base, penalty=self.penalty) + yield from self._get_assignments_postcode(base, query.num_token_slots()) # Postcode or country-only search if not base.address: @@ -278,49 +351,22 @@ class _TokenSequence: log().comment('postcode/country search') yield dataclasses.replace(base, penalty=self.penalty) else: - # Use entire first word as name + # ,
should give preference to postcode search + if base.postcode and base.postcode.start == 0: + self.penalty += 0.1 + + # Right-to-left reading of the address if self.direction != -1: - log().comment('first word = name') - yield dataclasses.replace(base, name=base.address[0], - penalty=self.penalty, - address=base.address[1:]) - - # Use entire last word as name - if self.direction == -1 or (self.direction == 0 and len(base.address) > 1): - log().comment('last word = name') - yield dataclasses.replace(base, name=base.address[-1], - penalty=self.penalty, - address=base.address[:-1]) + yield from self._get_assignments_address_forward(base, query) + + # Left-to-right reading of the address + if self.direction != 1: + yield from self._get_assignments_address_backward(base, query) # variant for special housenumber searches if base.housenumber: yield dataclasses.replace(base, penalty=self.penalty) - # Use beginning of first word as name - if self.direction != -1: - first = base.address[0] - if (not base.housenumber or first.end >= base.housenumber.start)\ - and (not base.qualifier or first.start >= base.qualifier.end): - for i in range(first.start + 1, first.end): - name, addr = first.split(i) - penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] - log().comment(f'split first word = name ({i - first.start})') - yield dataclasses.replace(base, name=name, penalty=penalty, - address=[addr] + base.address[1:]) - - # Use end of last word as name - if self.direction != 1: - last = base.address[-1] - if (not base.housenumber or last.start <= base.housenumber.end)\ - and (not base.qualifier or last.end <= base.qualifier.start): - for i in range(last.start + 1, last.end): - addr, name = last.split(i) - penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] - log().comment(f'split last word = name ({i - last.start})') - yield dataclasses.replace(base, name=name, penalty=penalty, - address=base.address[:-1] + [addr]) - - def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Return possible word type assignments to word positions.