# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
-This file replaces values based on pre-defined regex rules:
+This preprocessor replaces values in a given input based on pre-defined regex rules.
+
+Arguments:
+ pattern: Regex pattern to be applied on the input
+ replace: The string that it is to be replaced with
"""
from typing import List
import re
class _GenericPreprocessing:
+ """Perform replacements to input phrases using custom regex patterns."""
def __init__(self, config: QueryConfig) -> None:
+ """Initialise the _GenericPreprocessing class with patterns from the ICU config file."""
self.config = config
match_patterns = self.config.get('replacements', 'Key not found')
]
def split_phrase(self, phrase: Phrase) -> Phrase:
- """
- This function performs replacements on the given text using regex patterns.
- """
+ """This function performs replacements on the given text using regex patterns."""
for item in self.compiled_patterns:
phrase.text = item[0].sub(item[1], phrase.text)
return phrase
def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
- """Apply regex replacements to the given addresses.
+ """
+ Return the final Phrase list.
+ Returns an empty list if there is nothing left after split_phrase.
"""
result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
- return result if result else []
+ return result
def create(config: QueryConfig) -> QueryProcessingFunc:
- """ Create a function for generic preprocessing.
- """
+ """ Create a function for generic preprocessing."""
return _GenericPreprocessing(config)
query = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in inp]
out = run_preprocessor_on(query)
- expected_out = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]
-
- assert out == expected_out, f"Expected {expected_out}, but got {out}"
+ assert out == [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]