X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4774e45218bcfcef3390f820ad48b1b67b2bd0c4..1e5a8561c09f65e390ec51f2322919dd187bfbdf:/nominatim/tokenizer/sanitizers/helpers.py diff --git a/nominatim/tokenizer/sanitizers/helpers.py b/nominatim/tokenizer/sanitizers/helpers.py index 78b9a831..b92914e1 100644 --- a/nominatim/tokenizer/sanitizers/helpers.py +++ b/nominatim/tokenizer/sanitizers/helpers.py @@ -27,3 +27,26 @@ def create_split_regex(config, default=',;'): raise UsageError("Empty 'delimiter' parameter not allowed for sanitizer.") return re.compile('\\s*[{}]+\\s*'.format(''.join('\\' + d for d in delimiter_set))) + + +def create_kind_filter(config, default=None): + """ Create a filter function for the name kind from the 'filter-kind' + config parameter. The filter functions takes a name item and returns + True when the item passes the filter. + + If the parameter is empty, the filter lets all items pass. If the + paramter is a string, it is interpreted as a single regular expression + that must match the full kind string. If the parameter is a list then + any of the regular expressions in the list must match to pass. + """ + filters = config.get('filter-kind', default) + + if not filters: + return lambda _: True + + if isinstance(filters, str): + regex = re.compile(filters) + return lambda name: regex.fullmatch(name.kind) + + regexes = [re.compile(regex) for regex in filters] + return lambda name: any(regex.fullmatch(name.kind) for regex in regexes)