]> git.openstreetmap.org Git - nominatim.git/blobdiff - docs/customize/Tokenizers.md
remove website setup
[nominatim.git] / docs / customize / Tokenizers.md
index 6199ea4252469537a0c3953415cff05795735cdd..49e86a5009289cea7f12aea36202abbda1548737 100644 (file)
@@ -17,6 +17,11 @@ they can be configured.
 
 ## Legacy tokenizer
 
 
 ## Legacy tokenizer
 
+!!! danger
+    The Legacy tokenizer is deprecated and will be removed in Nominatim 5.0.
+    If you still use a database with the legacy tokenizer, you must reimport
+    it using the ICU tokenizer below.
+
 The legacy tokenizer implements the analysis algorithms of older Nominatim
 versions. It uses a special Postgresql module to normalize names and queries.
 This tokenizer is automatically installed and used when upgrading an older
 The legacy tokenizer implements the analysis algorithms of older Nominatim
 versions. It uses a special Postgresql module to normalize names and queries.
 This tokenizer is automatically installed and used when upgrading an older
@@ -52,7 +57,7 @@ NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
 ```
 
 This is in particular useful when the database runs on a different server.
 ```
 
 This is in particular useful when the database runs on a different server.
-See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
+See [Advanced installations](../admin/Advanced-Installations.md#using-an-external-postgresql-database) for details.
 
 There are no other configuration options for the legacy tokenizer. All
 normalization functions are hard-coded.
 
 There are no other configuration options for the legacy tokenizer. All
 normalization functions are hard-coded.
@@ -175,67 +180,67 @@ The following is a list of sanitizers that are shipped with Nominatim.
 
 ##### split-name-list
 
 
 ##### split-name-list
 
-::: nominatim.tokenizer.sanitizers.split_name_list
-    selection:
+::: nominatim_db.tokenizer.sanitizers.split_name_list
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### strip-brace-terms
 
 
 ##### strip-brace-terms
 
-::: nominatim.tokenizer.sanitizers.strip_brace_terms
-    selection:
+::: nominatim_db.tokenizer.sanitizers.strip_brace_terms
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### tag-analyzer-by-language
 
 
 ##### tag-analyzer-by-language
 
-::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
-    selection:
+::: nominatim_db.tokenizer.sanitizers.tag_analyzer_by_language
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### clean-housenumbers
 
 
 ##### clean-housenumbers
 
-::: nominatim.tokenizer.sanitizers.clean_housenumbers
-    selection:
+::: nominatim_db.tokenizer.sanitizers.clean_housenumbers
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### clean-postcodes
 
 
 ##### clean-postcodes
 
-::: nominatim.tokenizer.sanitizers.clean_postcodes
-    selection:
+::: nominatim_db.tokenizer.sanitizers.clean_postcodes
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### clean-tiger-tags
 
 
 ##### clean-tiger-tags
 
-::: nominatim.tokenizer.sanitizers.clean_tiger_tags
-    selection:
+::: nominatim_db.tokenizer.sanitizers.clean_tiger_tags
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 #### delete-tags
 
 
 #### delete-tags
 
-::: nominatim.tokenizer.sanitizers.delete_tags
-    selection:
+::: nominatim_db.tokenizer.sanitizers.delete_tags
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 #### tag-japanese
 
 
 #### tag-japanese
 
-::: nominatim.tokenizer.sanitizers.tag_japanese
-    selection:
+::: nominatim_db.tokenizer.sanitizers.tag_japanese
+    options:
         members: False
         members: False
-    rendering:
         heading_level: 6
         heading_level: 6
+        docstring_section_style: spacy
 
 #### Token Analysis
 
 
 #### Token Analysis
 
@@ -394,7 +399,7 @@ The analyzer cannot be customized.
 ##### Postcode token analyzer
 
 The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
 ##### Postcode token analyzer
 
 The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
-a 'lookup' varaint of the token, which produces variants with optional
+a 'lookup' variant of the token, which produces variants with optional
 spaces. Use together with the clean-postcodes sanitizer.
 
 The analyzer cannot be customized.
 spaces. Use together with the clean-postcodes sanitizer.
 
 The analyzer cannot be customized.