X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8171fe4571a57bf8e5b2a8f676989e973897e2e7..f5e52e748f0a4275e3238e51c47de2ddccfea0ff:/nominatim/tokenizer/sanitizers/split_name_list.py diff --git a/nominatim/tokenizer/sanitizers/split_name_list.py b/nominatim/tokenizer/sanitizers/split_name_list.py index 93651f3e..7d0667b4 100644 --- a/nominatim/tokenizer/sanitizers/split_name_list.py +++ b/nominatim/tokenizer/sanitizers/split_name_list.py @@ -1,17 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ -Name processor that splits name values with multiple values into their components. +Sanitizer that splits lists of names into their components. + +Arguments: + delimiters: Define the set of characters to be used for + splitting the list. (default: ',;') """ -import re +from typing import Callable + +from nominatim.tokenizer.sanitizers.base import ProcessInfo +from nominatim.tokenizer.sanitizers.config import SanitizerConfig -def create(func): +def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]: """ Create a name processing function that splits name values with - multiple values into their components. The optional parameter - 'delimiters' can be used to define the characters that should be used - for splitting. The default is ',;'. + multiple values into their components. """ - regexp = re.compile('[{}]'.format(func.get('delimiters', ',;'))) + regexp = config.get_delimiter() - def _process(obj): + def _process(obj: ProcessInfo) -> None: if not obj.names: return @@ -21,7 +32,7 @@ def create(func): if len(split_names) == 1: new_names.append(name) else: - new_names.extend(name.clone(name=n) for n in split_names) + new_names.extend(name.clone(name=n) for n in split_names if n) obj.names = new_names