]> git.openstreetmap.org Git - nominatim.git/commitdiff
complete documentation for new clean-houseunubmers sanatizer
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 20 Jan 2022 14:49:32 +0000 (15:49 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 20 Jan 2022 14:49:32 +0000 (15:49 +0100)
docs/customize/Tokenizers.md
nominatim/tokenizer/sanitizers/clean_housenumbers.py
settings/icu_tokenizer.yaml

index 5c766f50a3bf47055c41bcdbd39fd2f87d30e598..f75bc6a5c9da88fad8b92bc046337d2fcebff863 100644 (file)
@@ -181,6 +181,13 @@ The following is a list of sanitizers that are shipped with Nominatim.
     rendering:
         heading_level: 6
 
+##### clean-housenumbers
+
+::: nominatim.tokenizer.sanitizers.clean_housenumbers
+    selection:
+        members: False
+    rendering:
+        heading_level: 6
 
 
 #### Token Analysis
index 9777a7fcdcc6dc74103f727644cb8da5f81beaa9..85af903bedb4b63831ef0cd6a4c8b487bdc85e58 100644 (file)
@@ -5,7 +5,11 @@
 # Copyright (C) 2022 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
-Sanitizer that cleans and normalizes house numbers.
+Sanitizer that preprocesses address tags for house numbers. The sanitizer
+allows to
+
+* define which tags are to be considered house numbers (see 'filter-kind')
+* split house number lists into individual numbers (see 'delimiters')
 
 Arguments:
     delimiters: Define the set of characters to be used for
index d00cffb9ce9f9887d078cc3215230c79bb66b1d5..bf51f56344e12ae4e6986d8fc60683fa77a8d002 100644 (file)
@@ -28,6 +28,10 @@ sanitizers:
     - step: split-name-list
     - step: strip-brace-terms
     - step: clean-housenumbers
+      filter-kind:
+        - housenumber
+        - conscriptionnumber
+        - streetnumber
     - step: tag-analyzer-by-language
       filter-kind: [".*name.*"]
       whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]