Merge pull request #3510 from lonvia/indexing-precompute-count

[nominatim.git] / docs / customize / Tokenizers.md
diff --git a/docs/customize/Tokenizers.md b/docs/customize/Tokenizers.md

index 4a9d77bd6c1633344c7cc5e394f401047b7daf72..49e86a5009289cea7f12aea36202abbda1548737 100644 (file)
--- a/docs/customize/Tokenizers.md
+++ b/docs/customize/Tokenizers.md
@@ -17,6 +17,11 @@ they can be configured.
  
  ## Legacy tokenizer
  
  
  ## Legacy tokenizer
  
+!!! danger
+    The Legacy tokenizer is deprecated and will be removed in Nominatim 5.0.
+    If you still use a database with the legacy tokenizer, you must reimport
+    it using the ICU tokenizer below.
+
  The legacy tokenizer implements the analysis algorithms of older Nominatim
  versions. It uses a special Postgresql module to normalize names and queries.
  This tokenizer is automatically installed and used when upgrading an older
  The legacy tokenizer implements the analysis algorithms of older Nominatim
  versions. It uses a special Postgresql module to normalize names and queries.
  This tokenizer is automatically installed and used when upgrading an older
@@ -52,7 +57,7 @@ NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
  ```
  
  This is in particular useful when the database runs on a different server.
  ```
  
  This is in particular useful when the database runs on a different server.
-See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
+See [Advanced installations](../admin/Advanced-Installations.md#using-an-external-postgresql-database) for details.
  
  There are no other configuration options for the legacy tokenizer. All
  normalization functions are hard-coded.
  
  There are no other configuration options for the legacy tokenizer. All
  normalization functions are hard-coded.
@@ -175,73 +180,65 @@ The following is a list of sanitizers that are shipped with Nominatim.
  
  ##### split-name-list
  
  
  ##### split-name-list
  
-::: nominatim.tokenizer.sanitizers.split_name_list
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.split_name_list
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  ##### strip-brace-terms
  
          heading_level: 6
          docstring_section_style: spacy
  
  ##### strip-brace-terms
  
-::: nominatim.tokenizer.sanitizers.strip_brace_terms
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.strip_brace_terms
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  ##### tag-analyzer-by-language
  
          heading_level: 6
          docstring_section_style: spacy
  
  ##### tag-analyzer-by-language
  
-::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.tag_analyzer_by_language
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  ##### clean-housenumbers
  
          heading_level: 6
          docstring_section_style: spacy
  
  ##### clean-housenumbers
  
-::: nominatim.tokenizer.sanitizers.clean_housenumbers
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.clean_housenumbers
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  ##### clean-postcodes
  
          heading_level: 6
          docstring_section_style: spacy
  
  ##### clean-postcodes
  
-::: nominatim.tokenizer.sanitizers.clean_postcodes
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.clean_postcodes
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  ##### clean-tiger-tags
  
          heading_level: 6
          docstring_section_style: spacy
  
  ##### clean-tiger-tags
  
-::: nominatim.tokenizer.sanitizers.clean_tiger_tags
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.clean_tiger_tags
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  #### delete-tags
  
          heading_level: 6
          docstring_section_style: spacy
  
  #### delete-tags
  
-::: nominatim.tokenizer.sanitizers.delete_tags
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.delete_tags
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
  #### tag-japanese
  
          heading_level: 6
          docstring_section_style: spacy
  
  #### tag-japanese
  
-::: nominatim.tokenizer.sanitizers.tag_japanese
-    selection:
-        members: False
+::: nominatim_db.tokenizer.sanitizers.tag_japanese
      options:
      options:
+        members: False
          heading_level: 6
          docstring_section_style: spacy
  
          heading_level: 6
          docstring_section_style: spacy
  
@@ -402,7 +399,7 @@ The analyzer cannot be customized.
  ##### Postcode token analyzer
  
  The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
  ##### Postcode token analyzer
  
  The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
-a 'lookup' varaint of the token, which produces variants with optional
+a 'lookup' variant of the token, which produces variants with optional
  spaces. Use together with the clean-postcodes sanitizer.
  
  The analyzer cannot be customized.
  spaces. Use together with the clean-postcodes sanitizer.
  
  The analyzer cannot be customized.