X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/56f9535aa9cf3f9f500471603e5876cd3e0ddb0b..df15f13c628a3e15dee4bcad44e97b2cacb40b9f:/nominatim/api/search/token_assignment.py diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index 3f0e737b..ca907b79 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes housenumber: Optional[qmod.TokenRange] = None postcode: Optional[qmod.TokenRange] = None country: Optional[qmod.TokenRange] = None - category: Optional[qmod.TokenRange] = None + near_item: Optional[qmod.TokenRange] = None qualifier: Optional[qmod.TokenRange] = None @@ -64,15 +64,15 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes out.postcode = token.trange elif token.ttype == qmod.TokenType.COUNTRY: out.country = token.trange - elif token.ttype == qmod.TokenType.CATEGORY: - out.category = token.trange + elif token.ttype == qmod.TokenType.NEAR_ITEM: + out.near_item = token.trange elif token.ttype == qmod.TokenType.QUALIFIER: out.qualifier = token.trange return out class _TokenSequence: - """ Working state used to put together the token assignements. + """ Working state used to put together the token assignments. Represents an intermediate state while traversing the tokenized query. @@ -109,7 +109,7 @@ class _TokenSequence: """ # Country and category must be the final term for left-to-right return len(self.seq) > 1 and \ - self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY) + self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM) def appendable(self, ttype: qmod.TokenType) -> Optional[int]: @@ -132,6 +132,11 @@ class _TokenSequence: # Name tokens are always acceptable and don't change direction if ttype == qmod.TokenType.PARTIAL: + # qualifiers cannot appear in the middle of the query. They need + # to be near the next phrase. + if self.direction == -1 \ + and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]): + return None return self.direction # Other tokens may only appear once @@ -165,22 +170,22 @@ class _TokenSequence: if ttype == qmod.TokenType.COUNTRY: return None if self.direction == -1 else 1 - if ttype == qmod.TokenType.CATEGORY: + if ttype == qmod.TokenType.NEAR_ITEM: return self.direction if ttype == qmod.TokenType.QUALIFIER: if self.direction == 1: if (len(self.seq) == 1 - and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \ + and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \ or (len(self.seq) == 2 - and self.seq[0].ttype == qmod.TokenType.CATEGORY + and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM and self.seq[1].ttype == qmod.TokenType.PARTIAL): return 1 return None if self.direction == -1: return -1 - tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq + tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq if len(tempseq) == 0: return 1 if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER: @@ -233,10 +238,10 @@ class _TokenSequence: def recheck_sequence(self) -> bool: """ Check that the sequence is a fully valid token assignment - and addapt direction and penalties further if necessary. + and adapt direction and penalties further if necessary. This function catches some impossible assignments that need - forward context and can therefore not be exluded when building + forward context and can therefore not be excluded when building the assignment. """ # housenumbers may not be further than 2 words from the beginning. @@ -253,7 +258,7 @@ class _TokenSequence: priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) if not self._adapt_penalty_from_priors(priors, 1): return False - if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq): + if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq): self.penalty += 1.0 return True @@ -272,10 +277,10 @@ class _TokenSequence: #
,