X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/afb89f9c7abab5aad234889a4e4426ac57a5b56c..6b0d58d9fdbd2e488f3dc408c1b7d8837710f6e6:/src/nominatim_api/search/postcode_parser.py diff --git a/src/nominatim_api/search/postcode_parser.py b/src/nominatim_api/search/postcode_parser.py index 1148d3c3..bb3ef1a4 100644 --- a/src/nominatim_api/search/postcode_parser.py +++ b/src/nominatim_api/search/postcode_parser.py @@ -55,32 +55,49 @@ class PostcodeParser: [start node id, end node id, postcode token] """ nodes = query.nodes - outcodes = set() + outcodes: Set[Tuple[int, int, str]] = set() for i in range(query.num_token_slots()): - if nodes[i].btype in '<,: ' and nodes[i + 1].btype != '`': - word = nodes[i + 1].term_normalized + nodes[i + 1].btype - if word[-1] in ' -' and nodes[i + 2].btype != '`': - word += nodes[i + 2].term_normalized + nodes[i + 2].btype - if word[-1] in ' -' and nodes[i + 3].btype != '`': - word += nodes[i + 3].term_normalized + nodes[i + 3].btype - - # Use global pattern to check for presence of any postcode. - m = self.global_pattern.fullmatch(word) - if m: - # If there was a match, check against each pattern separately - # because multiple patterns might be machting at the end. - cc = m.group('cc') - pc_word = m.group('pc') - cc_spaces = len(m.group('space') or '') - for pattern, info in self.local_patterns: - lm = pattern.match(pc_word) - if lm: - trange = (i, i + cc_spaces + sum(c in ' ,-:>' for c in lm.group(0))) - for out, out_ccs in info: - if cc is None or cc in out_ccs: - if out: - outcodes.add((*trange, lm.expand(out).upper())) - else: - outcodes.add((*trange, lm.group(0)[:-1].upper())) + if nodes[i].btype in '<,: ' and nodes[i + 1].btype != '`' \ + and (i == 0 or nodes[i - 1].ptype != qmod.PHRASE_POSTCODE): + if nodes[i].ptype == qmod.PHRASE_ANY: + word = nodes[i + 1].term_normalized + nodes[i + 1].btype + if word[-1] in ' -' and nodes[i + 2].btype != '`' \ + and nodes[i + 1].ptype == qmod.PHRASE_ANY: + word += nodes[i + 2].term_normalized + nodes[i + 2].btype + if word[-1] in ' -' and nodes[i + 3].btype != '`' \ + and nodes[i + 2].ptype == qmod.PHRASE_ANY: + word += nodes[i + 3].term_normalized + nodes[i + 3].btype + + self._match_word(word, i, False, outcodes) + elif nodes[i].ptype == qmod.PHRASE_POSTCODE: + word = nodes[i + 1].term_normalized + nodes[i + 1].btype + for j in range(i + 1, query.num_token_slots()): + if nodes[j].ptype != qmod.PHRASE_POSTCODE: + break + word += nodes[j + 1].term_normalized + nodes[j + 1].btype + + self._match_word(word, i, True, outcodes) + return outcodes + + def _match_word(self, word: str, pos: int, fullmatch: bool, + outcodes: Set[Tuple[int, int, str]]) -> None: + # Use global pattern to check for presence of any postcode. + m = self.global_pattern.fullmatch(word) + if m: + # If there was a match, check against each pattern separately + # because multiple patterns might be machting at the end. + cc = m.group('cc') + pc_word = m.group('pc') + cc_spaces = len(m.group('space') or '') + for pattern, info in self.local_patterns: + lm = pattern.fullmatch(pc_word) if fullmatch else pattern.match(pc_word) + if lm: + trange = (pos, pos + cc_spaces + sum(c in ' ,-:>' for c in lm.group(0))) + for out, out_ccs in info: + if cc is None or cc in out_ccs: + if out: + outcodes.add((*trange, lm.expand(out).upper())) + else: + outcodes.add((*trange, lm.group(0)[:-1].upper()))