]> git.openstreetmap.org Git - nominatim.git/blobdiff - docs/customize/Tokenizers.md
Merge pull request #3510 from lonvia/indexing-precompute-count
[nominatim.git] / docs / customize / Tokenizers.md
index 4a9d77bd6c1633344c7cc5e394f401047b7daf72..49e86a5009289cea7f12aea36202abbda1548737 100644 (file)
@@ -17,6 +17,11 @@ they can be configured.
 
 ## Legacy tokenizer
 
 
 ## Legacy tokenizer
 
+!!! danger
+    The Legacy tokenizer is deprecated and will be removed in Nominatim 5.0.
+    If you still use a database with the legacy tokenizer, you must reimport
+    it using the ICU tokenizer below.
+
 The legacy tokenizer implements the analysis algorithms of older Nominatim
 versions. It uses a special Postgresql module to normalize names and queries.
 This tokenizer is automatically installed and used when upgrading an older
 The legacy tokenizer implements the analysis algorithms of older Nominatim
 versions. It uses a special Postgresql module to normalize names and queries.
 This tokenizer is automatically installed and used when upgrading an older
@@ -52,7 +57,7 @@ NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
 ```
 
 This is in particular useful when the database runs on a different server.
 ```
 
 This is in particular useful when the database runs on a different server.
-See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
+See [Advanced installations](../admin/Advanced-Installations.md#using-an-external-postgresql-database) for details.
 
 There are no other configuration options for the legacy tokenizer. All
 normalization functions are hard-coded.
 
 There are no other configuration options for the legacy tokenizer. All
 normalization functions are hard-coded.
@@ -175,73 +180,65 @@ The following is a list of sanitizers that are shipped with Nominatim.
 
 ##### split-name-list
 
 
 ##### split-name-list
 
-::: nominatim.tokenizer.sanitizers.split_name_list
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.split_name_list
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 ##### strip-brace-terms
 
         heading_level: 6
         docstring_section_style: spacy
 
 ##### strip-brace-terms
 
-::: nominatim.tokenizer.sanitizers.strip_brace_terms
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.strip_brace_terms
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 ##### tag-analyzer-by-language
 
         heading_level: 6
         docstring_section_style: spacy
 
 ##### tag-analyzer-by-language
 
-::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.tag_analyzer_by_language
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 ##### clean-housenumbers
 
         heading_level: 6
         docstring_section_style: spacy
 
 ##### clean-housenumbers
 
-::: nominatim.tokenizer.sanitizers.clean_housenumbers
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.clean_housenumbers
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 ##### clean-postcodes
 
         heading_level: 6
         docstring_section_style: spacy
 
 ##### clean-postcodes
 
-::: nominatim.tokenizer.sanitizers.clean_postcodes
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.clean_postcodes
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 ##### clean-tiger-tags
 
         heading_level: 6
         docstring_section_style: spacy
 
 ##### clean-tiger-tags
 
-::: nominatim.tokenizer.sanitizers.clean_tiger_tags
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.clean_tiger_tags
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 #### delete-tags
 
         heading_level: 6
         docstring_section_style: spacy
 
 #### delete-tags
 
-::: nominatim.tokenizer.sanitizers.delete_tags
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.delete_tags
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
 #### tag-japanese
 
         heading_level: 6
         docstring_section_style: spacy
 
 #### tag-japanese
 
-::: nominatim.tokenizer.sanitizers.tag_japanese
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.tag_japanese
     options:
     options:
+        members: False
         heading_level: 6
         docstring_section_style: spacy
 
         heading_level: 6
         docstring_section_style: spacy
 
@@ -402,7 +399,7 @@ The analyzer cannot be customized.
 ##### Postcode token analyzer
 
 The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
 ##### Postcode token analyzer
 
 The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
-a 'lookup' varaint of the token, which produces variants with optional
+a 'lookup' variant of the token, which produces variants with optional
 spaces. Use together with the clean-postcodes sanitizer.
 
 The analyzer cannot be customized.
 spaces. Use together with the clean-postcodes sanitizer.
 
 The analyzer cannot be customized.