From: Sarah Hoffmann Date: Fri, 29 Jul 2022 09:39:55 +0000 (+0200) Subject: move PlaceName into the generic data module X-Git-Tag: v4.1.0~4^2~3 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/34d27ed45cac67fd155f6997d82fa6a5d8946ce9 move PlaceName into the generic data module --- diff --git a/docs/develop/ICU-Tokenizer-Modules.md b/docs/develop/ICU-Tokenizer-Modules.md index e4af65ed..51b189f1 100644 --- a/docs/develop/ICU-Tokenizer-Modules.md +++ b/docs/develop/ICU-Tokenizer-Modules.md @@ -77,7 +77,7 @@ adding extra attributes) or completely replace the list with a different one. #### PlaceName - extended naming information -::: nominatim.tokenizer.sanitizers.base.PlaceName +::: nominatim.data.place_name.PlaceName rendering: show_source: no heading_level: 6 @@ -94,7 +94,7 @@ functions: heading_level: 6 -::: nominatim.tokenizer.token_analysis.base.Analyser +::: nominatim.tokenizer.token_analysis.base.Analyzer rendering: show_source: no heading_level: 6 diff --git a/nominatim/data/place_name.py b/nominatim/data/place_name.py new file mode 100644 index 00000000..4dfaf819 --- /dev/null +++ b/nominatim/data/place_name.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Data class for a single name of a place. +""" +from typing import Optional, Dict, Mapping + +class PlaceName: + """ Each name and address part of a place is encapsulated in an object of + this class. It saves not only the name proper but also describes the + kind of name with two properties: + + * `kind` describes the name of the OSM key used without any suffixes + (i.e. the part after the colon removed) + * `suffix` contains the suffix of the OSM tag, if any. The suffix + is the part of the key after the first colon. + + In addition to that, a name may have arbitrary additional attributes. + How attributes are used, depends on the sanatizers and token analysers. + The exception is is the 'analyzer' attribute. This attribute determines + which token analysis module will be used to finalize the treatment of + names. + """ + + def __init__(self, name: str, kind: str, suffix: Optional[str]): + self.name = name + self.kind = kind + self.suffix = suffix + self.attr: Dict[str, str] = {} + + + def __repr__(self) -> str: + return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')" + + + def clone(self, name: Optional[str] = None, + kind: Optional[str] = None, + suffix: Optional[str] = None, + attr: Optional[Mapping[str, str]] = None) -> 'PlaceName': + """ Create a deep copy of the place name, optionally with the + given parameters replaced. In the attribute list only the given + keys are updated. The list is not replaced completely. + In particular, the function cannot to be used to remove an + attribute from a place name. + """ + newobj = PlaceName(name or self.name, + kind or self.kind, + suffix or self.suffix) + + newobj.attr.update(self.attr) + if attr: + newobj.attr.update(attr) + + return newobj + + + def set_attr(self, key: str, value: str) -> None: + """ Add the given property to the name. If the property was already + set, then the value is overwritten. + """ + self.attr[key] = value + + + def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]: + """ Return the given property or the value of 'default' if it + is not set. + """ + return self.attr.get(key, default) + + + def has_attr(self, key: str) -> bool: + """ Check if the given attribute is set. + """ + return key in self.attr diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 83013755..98c80376 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -23,7 +23,7 @@ from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.data.place_info import PlaceInfo from nominatim.tokenizer.icu_rule_loader import ICURuleLoader from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.tokenizer.sanitizers.base import PlaceName +from nominatim.data.place_name import PlaceName from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis from nominatim.tokenizer.base import AbstractAnalyzer, AbstractTokenizer diff --git a/nominatim/tokenizer/place_sanitizer.py b/nominatim/tokenizer/place_sanitizer.py index c7dfd1ba..2f76fe34 100644 --- a/nominatim/tokenizer/place_sanitizer.py +++ b/nominatim/tokenizer/place_sanitizer.py @@ -13,7 +13,8 @@ from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple from nominatim.errors import UsageError from nominatim.config import Configuration from nominatim.tokenizer.sanitizers.config import SanitizerConfig -from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo, PlaceName +from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo +from nominatim.data.place_name import PlaceName from nominatim.data.place_info import PlaceInfo diff --git a/nominatim/tokenizer/sanitizers/base.py b/nominatim/tokenizer/sanitizers/base.py index 09ea2dae..2de868c7 100644 --- a/nominatim/tokenizer/sanitizers/base.py +++ b/nominatim/tokenizer/sanitizers/base.py @@ -7,80 +7,13 @@ """ Common data types and protocols for sanitizers. """ -from typing import Optional, Dict, List, Mapping, Callable +from typing import Optional, List, Mapping, Callable from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.data.place_info import PlaceInfo +from nominatim.data.place_name import PlaceName from nominatim.typing import Protocol, Final -class PlaceName: - """ Each name and address part of a place is encapsulated in an object of - this class. It saves not only the name proper but also describes the - kind of name with two properties: - - * `kind` describes the name of the OSM key used without any suffixes - (i.e. the part after the colon removed) - * `suffix` contains the suffix of the OSM tag, if any. The suffix - is the part of the key after the first colon. - - In addition to that, a name may have arbitrary additional attributes. - How attributes are used, depends on the sanatizers and token analysers. - The exception is is the 'analyzer' attribute. This apptribute determines - which token analysis module will be used to finalize the treatment of - names. - """ - - def __init__(self, name: str, kind: str, suffix: Optional[str]): - self.name = name - self.kind = kind - self.suffix = suffix - self.attr: Dict[str, str] = {} - - - def __repr__(self) -> str: - return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')" - - - def clone(self, name: Optional[str] = None, - kind: Optional[str] = None, - suffix: Optional[str] = None, - attr: Optional[Mapping[str, str]] = None) -> 'PlaceName': - """ Create a deep copy of the place name, optionally with the - given parameters replaced. In the attribute list only the given - keys are updated. The list is not replaced completely. - In particular, the function cannot to be used to remove an - attribute from a place name. - """ - newobj = PlaceName(name or self.name, - kind or self.kind, - suffix or self.suffix) - - newobj.attr.update(self.attr) - if attr: - newobj.attr.update(attr) - - return newobj - - - def set_attr(self, key: str, value: str) -> None: - """ Add the given property to the name. If the property was already - set, then the value is overwritten. - """ - self.attr[key] = value - - - def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]: - """ Return the given property or the value of 'default' if it - is not set. - """ - return self.attr.get(key, default) - - - def has_attr(self, key: str) -> bool: - """ Check if the given attribute is set. - """ - return key in self.attr - class ProcessInfo: """ Container class for information handed into to handler functions. diff --git a/nominatim/tokenizer/sanitizers/clean_housenumbers.py b/nominatim/tokenizer/sanitizers/clean_housenumbers.py index 5df057d0..417d68d2 100644 --- a/nominatim/tokenizer/sanitizers/clean_housenumbers.py +++ b/nominatim/tokenizer/sanitizers/clean_housenumbers.py @@ -27,7 +27,8 @@ Arguments: from typing import Callable, Iterator, List import re -from nominatim.tokenizer.sanitizers.base import ProcessInfo, PlaceName +from nominatim.tokenizer.sanitizers.base import ProcessInfo +from nominatim.data.place_name import PlaceName from nominatim.tokenizer.sanitizers.config import SanitizerConfig class _HousenumberSanitizer: