X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/927d2cc824e0437dd2ea6abc4ef47c9b3ed3d0aa..9c48726691b42a75ed658f83e44e899d6db54a40:/nominatim/api/search/token_assignment.py diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index 0ae2cd43..ca907b79 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes housenumber: Optional[qmod.TokenRange] = None postcode: Optional[qmod.TokenRange] = None country: Optional[qmod.TokenRange] = None - category: Optional[qmod.TokenRange] = None + near_item: Optional[qmod.TokenRange] = None qualifier: Optional[qmod.TokenRange] = None @@ -64,15 +64,15 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes out.postcode = token.trange elif token.ttype == qmod.TokenType.COUNTRY: out.country = token.trange - elif token.ttype == qmod.TokenType.CATEGORY: - out.category = token.trange + elif token.ttype == qmod.TokenType.NEAR_ITEM: + out.near_item = token.trange elif token.ttype == qmod.TokenType.QUALIFIER: out.qualifier = token.trange return out class _TokenSequence: - """ Working state used to put together the token assignements. + """ Working state used to put together the token assignments. Represents an intermediate state while traversing the tokenized query. @@ -109,7 +109,7 @@ class _TokenSequence: """ # Country and category must be the final term for left-to-right return len(self.seq) > 1 and \ - self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY) + self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM) def appendable(self, ttype: qmod.TokenType) -> Optional[int]: @@ -132,6 +132,11 @@ class _TokenSequence: # Name tokens are always acceptable and don't change direction if ttype == qmod.TokenType.PARTIAL: + # qualifiers cannot appear in the middle of the query. They need + # to be near the next phrase. + if self.direction == -1 \ + and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]): + return None return self.direction # Other tokens may only appear once @@ -165,22 +170,22 @@ class _TokenSequence: if ttype == qmod.TokenType.COUNTRY: return None if self.direction == -1 else 1 - if ttype == qmod.TokenType.CATEGORY: + if ttype == qmod.TokenType.NEAR_ITEM: return self.direction if ttype == qmod.TokenType.QUALIFIER: if self.direction == 1: if (len(self.seq) == 1 - and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \ + and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \ or (len(self.seq) == 2 - and self.seq[0].ttype == qmod.TokenType.CATEGORY + and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM and self.seq[1].ttype == qmod.TokenType.PARTIAL): return 1 return None if self.direction == -1: return -1 - tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq + tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq if len(tempseq) == 0: return 1 if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER: @@ -233,10 +238,10 @@ class _TokenSequence: def recheck_sequence(self) -> bool: """ Check that the sequence is a fully valid token assignment - and addapt direction and penalties further if necessary. + and adapt direction and penalties further if necessary. This function catches some impossible assignments that need - forward context and can therefore not be exluded when building + forward context and can therefore not be excluded when building the assignment. """ # housenumbers may not be further than 2 words from the beginning. @@ -253,6 +258,8 @@ class _TokenSequence: priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) if not self._adapt_penalty_from_priors(priors, 1): return False + if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq): + self.penalty += 1.0 return True @@ -270,10 +277,10 @@ class _TokenSequence: #
, should give preference to address search if base.postcode.start == 0: penalty = self.penalty - self.direction = -1 # name searches are only possbile backwards + self.direction = -1 # name searches are only possible backwards else: penalty = self.penalty + 0.1 - self.direction = 1 # name searches are only possbile forwards + self.direction = 1 # name searches are only possible forwards yield dataclasses.replace(base, penalty=penalty) @@ -366,7 +373,7 @@ class _TokenSequence: # Postcode or country-only search if not base.address: - if not base.housenumber and (base.postcode or base.country or base.category): + if not base.housenumber and (base.postcode or base.country or base.near_item): log().comment('postcode/country search') yield dataclasses.replace(base, penalty=self.penalty) else: @@ -383,7 +390,7 @@ class _TokenSequence: yield from self._get_assignments_address_backward(base, query) # variant for special housenumber searches - if base.housenumber: + if base.housenumber and not base.qualifier: yield dataclasses.replace(base, penalty=self.penalty)