X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4a00a3c0f5fc3a522fab66808f83ddfe144ccb92..44da684d1dfc57c085f6544d615374e97d8e96db:/nominatim/api/search/token_assignment.py diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index c05c271a..3f0e737b 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -253,10 +253,103 @@ class _TokenSequence: priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) if not self._adapt_penalty_from_priors(priors, 1): return False + if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq): + self.penalty += 1.0 return True + def _get_assignments_postcode(self, base: TokenAssignment, + query_len: int) -> Iterator[TokenAssignment]: + """ Yield possible assignments of Postcode searches with an + address component. + """ + assert base.postcode is not None + + if (base.postcode.start == 0 and self.direction != -1)\ + or (base.postcode.end == query_len and self.direction != 1): + log().comment('postcode search') + #
, should give preference to address search + if base.postcode.start == 0: + penalty = self.penalty + self.direction = -1 # name searches are only possbile backwards + else: + penalty = self.penalty + 0.1 + self.direction = 1 # name searches are only possbile forwards + yield dataclasses.replace(base, penalty=penalty) + + + def _get_assignments_address_forward(self, base: TokenAssignment, + query: qmod.QueryStruct) -> Iterator[TokenAssignment]: + """ Yield possible assignments of address searches with + left-to-right reading. + """ + first = base.address[0] + + log().comment('first word = name') + yield dataclasses.replace(base, penalty=self.penalty, + name=first, address=base.address[1:]) + + # To paraphrase: + # * if another name term comes after the first one and before the + # housenumber + # * a qualifier comes after the name + # * the containing phrase is strictly typed + if (base.housenumber and first.end < base.housenumber.start)\ + or (base.qualifier and base.qualifier > first)\ + or (query.nodes[first.start].ptype != qmod.PhraseType.NONE): + return + + penalty = self.penalty + + # Penalty for: + # * , , , ... + # * queries that are comma-separated + if (base.housenumber and base.housenumber > first) or len(query.source) > 1: + penalty += 0.25 + + for i in range(first.start + 1, first.end): + name, addr = first.split(i) + log().comment(f'split first word = name ({i - first.start})') + yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:], + penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]) + + + def _get_assignments_address_backward(self, base: TokenAssignment, + query: qmod.QueryStruct) -> Iterator[TokenAssignment]: + """ Yield possible assignments of address searches with + right-to-left reading. + """ + last = base.address[-1] + + if self.direction == -1 or len(base.address) > 1: + log().comment('last word = name') + yield dataclasses.replace(base, penalty=self.penalty, + name=last, address=base.address[:-1]) + + # To paraphrase: + # * if another name term comes before the last one and after the + # housenumber + # * a qualifier comes before the name + # * the containing phrase is strictly typed + if (base.housenumber and last.start > base.housenumber.end)\ + or (base.qualifier and base.qualifier < last)\ + or (query.nodes[last.start].ptype != qmod.PhraseType.NONE): + return + + penalty = self.penalty + if base.housenumber and base.housenumber < last: + penalty += 0.4 + if len(query.source) > 1: + penalty += 0.25 + + for i in range(last.start + 1, last.end): + addr, name = last.split(i) + log().comment(f'split last word = name ({i - last.start})') + yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr], + penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]) + + def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Yield possible assignments for the current sequence. @@ -265,17 +358,13 @@ class _TokenSequence: """ base = TokenAssignment.from_ranges(self.seq) + num_addr_tokens = sum(t.end - t.start for t in base.address) + if num_addr_tokens > 50: + return + # Postcode search (postcode-only search is covered in next case) if base.postcode is not None and base.address: - if (base.postcode.start == 0 and self.direction != -1)\ - or (base.postcode.end == query.num_token_slots() and self.direction != 1): - log().comment('postcode search') - #
, should give preference to address search - if base.postcode.start == 0: - penalty = self.penalty - else: - penalty = self.penalty + 0.1 - yield dataclasses.replace(base, penalty=penalty) + yield from self._get_assignments_postcode(base, query.num_token_slots()) # Postcode or country-only search if not base.address: @@ -286,58 +375,19 @@ class _TokenSequence: # ,
should give preference to postcode search if base.postcode and base.postcode.start == 0: self.penalty += 0.1 - # Use entire first word as name + + # Right-to-left reading of the address if self.direction != -1: - log().comment('first word = name') - yield dataclasses.replace(base, name=base.address[0], - penalty=self.penalty, - address=base.address[1:]) - - # Use entire last word as name - if self.direction == -1 or (self.direction == 0 and len(base.address) > 1): - log().comment('last word = name') - yield dataclasses.replace(base, name=base.address[-1], - penalty=self.penalty, - address=base.address[:-1]) + yield from self._get_assignments_address_forward(base, query) + + # Left-to-right reading of the address + if self.direction != 1: + yield from self._get_assignments_address_backward(base, query) # variant for special housenumber searches if base.housenumber: yield dataclasses.replace(base, penalty=self.penalty) - # Use beginning of first word as name - if self.direction != -1: - first = base.address[0] - if (not base.housenumber or first.end >= base.housenumber.start)\ - and (not base.qualifier or first.start >= base.qualifier.end): - base_penalty = self.penalty - if (base.housenumber and base.housenumber.start > first.start) \ - or len(query.source) > 1: - base_penalty += 0.25 - for i in range(first.start + 1, first.end): - name, addr = first.split(i) - penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] - log().comment(f'split first word = name ({i - first.start})') - yield dataclasses.replace(base, name=name, penalty=penalty, - address=[addr] + base.address[1:]) - - # Use end of last word as name - if self.direction != 1: - last = base.address[-1] - if (not base.housenumber or last.start <= base.housenumber.end)\ - and (not base.qualifier or last.end <= base.qualifier.start): - base_penalty = self.penalty - if base.housenumber and base.housenumber.start < last.start: - base_penalty += 0.4 - if len(query.source) > 1: - base_penalty += 0.25 - for i in range(last.start + 1, last.end): - addr, name = last.split(i) - penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] - log().comment(f'split last word = name ({i - last.start})') - yield dataclasses.replace(base, name=name, penalty=penalty, - address=base.address[:-1] + [addr]) - - def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Return possible word type assignments to word positions.