From: TuringVerified Date: Mon, 31 Mar 2025 19:57:45 +0000 (+0530) Subject: Remove unnecessary assert statement, Fix regex_replace docstring and simplify regex_r... X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/2eeec460400faf84d9f9ec63bb1bb7dbf9b85438?ds=inline Remove unnecessary assert statement, Fix regex_replace docstring and simplify regex_replace --- diff --git a/docs/customize/Tokenizers.md b/docs/customize/Tokenizers.md index 2430be7e..23db34c9 100644 --- a/docs/customize/Tokenizers.md +++ b/docs/customize/Tokenizers.md @@ -67,8 +67,9 @@ Here is an example configuration file: ``` yaml query-preprocessing: + - step: split_japanese_phrases - step: regex_replace - replacements: + replacements: - pattern: https?://[^\s]* # Filter URLs starting with http or https replace: '' - step: normalize @@ -111,6 +112,8 @@ The following is a list of preprocessors that are shipped with Nominatim. heading_level: 6 docstring_section_style: spacy +##### regex-replace + ::: nominatim_api.query_preprocessing.regex_replace options: members: False diff --git a/src/nominatim_api/query_preprocessing/regex_replace.py b/src/nominatim_api/query_preprocessing/regex_replace.py index b711d54b..b3a02495 100644 --- a/src/nominatim_api/query_preprocessing/regex_replace.py +++ b/src/nominatim_api/query_preprocessing/regex_replace.py @@ -5,7 +5,11 @@ # Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ -This file replaces values based on pre-defined regex rules: +This preprocessor replaces values in a given input based on pre-defined regex rules. + +Arguments: + pattern: Regex pattern to be applied on the input + replace: The string that it is to be replaced with """ from typing import List import re @@ -16,8 +20,10 @@ from ..search.query import Phrase class _GenericPreprocessing: + """Perform replacements to input phrases using custom regex patterns.""" def __init__(self, config: QueryConfig) -> None: + """Initialise the _GenericPreprocessing class with patterns from the ICU config file.""" self.config = config match_patterns = self.config.get('replacements', 'Key not found') @@ -26,22 +32,21 @@ class _GenericPreprocessing: ] def split_phrase(self, phrase: Phrase) -> Phrase: - """ - This function performs replacements on the given text using regex patterns. - """ + """This function performs replacements on the given text using regex patterns.""" for item in self.compiled_patterns: phrase.text = item[0].sub(item[1], phrase.text) return phrase def __call__(self, phrases: List[Phrase]) -> List[Phrase]: - """Apply regex replacements to the given addresses. + """ + Return the final Phrase list. + Returns an empty list if there is nothing left after split_phrase. """ result = [p for p in map(self.split_phrase, phrases) if p.text.strip()] - return result if result else [] + return result def create(config: QueryConfig) -> QueryProcessingFunc: - """ Create a function for generic preprocessing. - """ + """ Create a function for generic preprocessing.""" return _GenericPreprocessing(config) diff --git a/test/python/api/query_processing/test_regex_replace.py b/test/python/api/query_processing/test_regex_replace.py index 288ac23e..ef759ba1 100644 --- a/test/python/api/query_processing/test_regex_replace.py +++ b/test/python/api/query_processing/test_regex_replace.py @@ -46,6 +46,4 @@ def test_split_phrases(inp, outp): query = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in inp] out = run_preprocessor_on(query) - expected_out = [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp] - - assert out == expected_out, f"Expected {expected_out}, but got {out}" + assert out == [qmod.Phrase(qmod.PHRASE_ANY, text) for text in outp]