X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4665ea3e773b5620f5b65cf2396a91192cc8cda0..6cf1287c4e7f7f7ef6af8489ccb54a213bf7571a:/src/nominatim_api/query_preprocessing/regex_replace.py?ds=inline diff --git a/src/nominatim_api/query_preprocessing/regex_replace.py b/src/nominatim_api/query_preprocessing/regex_replace.py index 883fa991..b3a02495 100644 --- a/src/nominatim_api/query_preprocessing/regex_replace.py +++ b/src/nominatim_api/query_preprocessing/regex_replace.py @@ -5,7 +5,11 @@ # Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ -This file replaces values based on pre-defined regex rules: +This preprocessor replaces values in a given input based on pre-defined regex rules. + +Arguments: + pattern: Regex pattern to be applied on the input + replace: The string that it is to be replaced with """ from typing import List import re @@ -16,31 +20,33 @@ from ..search.query import Phrase class _GenericPreprocessing: + """Perform replacements to input phrases using custom regex patterns.""" def __init__(self, config: QueryConfig) -> None: + """Initialise the _GenericPreprocessing class with patterns from the ICU config file.""" self.config = config - def split_phrase(self, phrase: Phrase) -> Phrase: - """ - This function performs replacements on the given text using regex patterns. - """ - - if phrase.text is None: - return phrase - match_patterns = self.config.get('replacements', 'Key not found') - for item in match_patterns: - phrase.text = re.sub(item['pattern'], item['replace'], phrase.text) + self.compiled_patterns = [ + (re.compile(item['pattern']), item['replace']) for item in match_patterns + ] + + def split_phrase(self, phrase: Phrase) -> Phrase: + """This function performs replacements on the given text using regex patterns.""" + for item in self.compiled_patterns: + phrase.text = item[0].sub(item[1], phrase.text) return phrase def __call__(self, phrases: List[Phrase]) -> List[Phrase]: - """Apply regex replacements to the given addresses. """ - return [self.split_phrase(p) for p in phrases] + Return the final Phrase list. + Returns an empty list if there is nothing left after split_phrase. + """ + result = [p for p in map(self.split_phrase, phrases) if p.text.strip()] + return result def create(config: QueryConfig) -> QueryProcessingFunc: - """ Create a function for generic preprocessing. - """ + """ Create a function for generic preprocessing.""" return _GenericPreprocessing(config)