X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d48ea4f22cd60206613c193dd6aa2dd5e92029bd..d6960c72e48cf83b8f5806ea751609b02cf4fd3d:/nominatim/api/search/token_assignment.py?ds=sidebyside diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index edddd100..3f0e737b 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -253,6 +253,8 @@ class _TokenSequence: priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) if not self._adapt_penalty_from_priors(priors, 1): return False + if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq): + self.penalty += 1.0 return True @@ -273,10 +275,81 @@ class _TokenSequence: self.direction = -1 # name searches are only possbile backwards else: penalty = self.penalty + 0.1 - self.direction = 1 # name searches are only possbile forewards + self.direction = 1 # name searches are only possbile forwards yield dataclasses.replace(base, penalty=penalty) + def _get_assignments_address_forward(self, base: TokenAssignment, + query: qmod.QueryStruct) -> Iterator[TokenAssignment]: + """ Yield possible assignments of address searches with + left-to-right reading. + """ + first = base.address[0] + + log().comment('first word = name') + yield dataclasses.replace(base, penalty=self.penalty, + name=first, address=base.address[1:]) + + # To paraphrase: + # * if another name term comes after the first one and before the + # housenumber + # * a qualifier comes after the name + # * the containing phrase is strictly typed + if (base.housenumber and first.end < base.housenumber.start)\ + or (base.qualifier and base.qualifier > first)\ + or (query.nodes[first.start].ptype != qmod.PhraseType.NONE): + return + + penalty = self.penalty + + # Penalty for: + # * , , , ... + # * queries that are comma-separated + if (base.housenumber and base.housenumber > first) or len(query.source) > 1: + penalty += 0.25 + + for i in range(first.start + 1, first.end): + name, addr = first.split(i) + log().comment(f'split first word = name ({i - first.start})') + yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:], + penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]) + + + def _get_assignments_address_backward(self, base: TokenAssignment, + query: qmod.QueryStruct) -> Iterator[TokenAssignment]: + """ Yield possible assignments of address searches with + right-to-left reading. + """ + last = base.address[-1] + + if self.direction == -1 or len(base.address) > 1: + log().comment('last word = name') + yield dataclasses.replace(base, penalty=self.penalty, + name=last, address=base.address[:-1]) + + # To paraphrase: + # * if another name term comes before the last one and after the + # housenumber + # * a qualifier comes before the name + # * the containing phrase is strictly typed + if (base.housenumber and last.start > base.housenumber.end)\ + or (base.qualifier and base.qualifier < last)\ + or (query.nodes[last.start].ptype != qmod.PhraseType.NONE): + return + + penalty = self.penalty + if base.housenumber and base.housenumber < last: + penalty += 0.4 + if len(query.source) > 1: + penalty += 0.25 + + for i in range(last.start + 1, last.end): + addr, name = last.split(i) + log().comment(f'split last word = name ({i - last.start})') + yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr], + penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]) + + def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Yield possible assignments for the current sequence. @@ -302,58 +375,19 @@ class _TokenSequence: # ,
should give preference to postcode search if base.postcode and base.postcode.start == 0: self.penalty += 0.1 - # Use entire first word as name + + # Right-to-left reading of the address if self.direction != -1: - log().comment('first word = name') - yield dataclasses.replace(base, name=base.address[0], - penalty=self.penalty, - address=base.address[1:]) - - # Use entire last word as name - if self.direction == -1 or (self.direction == 0 and len(base.address) > 1): - log().comment('last word = name') - yield dataclasses.replace(base, name=base.address[-1], - penalty=self.penalty, - address=base.address[:-1]) + yield from self._get_assignments_address_forward(base, query) + + # Left-to-right reading of the address + if self.direction != 1: + yield from self._get_assignments_address_backward(base, query) # variant for special housenumber searches if base.housenumber: yield dataclasses.replace(base, penalty=self.penalty) - # Use beginning of first word as name - if self.direction != -1: - first = base.address[0] - if (not base.housenumber or first.end >= base.housenumber.start)\ - and (not base.qualifier or first.start >= base.qualifier.end): - base_penalty = self.penalty - if (base.housenumber and base.housenumber.start > first.start) \ - or len(query.source) > 1: - base_penalty += 0.25 - for i in range(first.start + 1, first.end): - name, addr = first.split(i) - penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] - log().comment(f'split first word = name ({i - first.start})') - yield dataclasses.replace(base, name=name, penalty=penalty, - address=[addr] + base.address[1:]) - - # Use end of last word as name - if self.direction != 1: - last = base.address[-1] - if (not base.housenumber or last.start <= base.housenumber.end)\ - and (not base.qualifier or last.end <= base.qualifier.start): - base_penalty = self.penalty - if base.housenumber and base.housenumber.start < last.start: - base_penalty += 0.4 - if len(query.source) > 1: - base_penalty += 0.25 - for i in range(last.start + 1, last.end): - addr, name = last.split(i) - penalty = base_penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype] - log().comment(f'split last word = name ({i - last.start})') - yield dataclasses.replace(base, name=name, penalty=penalty, - address=base.address[:-1] + [addr]) - - def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]: """ Return possible word type assignments to word positions.