]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/token_analysis/generic.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / tokenizer / token_analysis / generic.py
index a5c9f4937a859cec94bb244064fa50afb13fe664..3de915ba5254e1859976dd7e9842247df5a58b98 100644 (file)
@@ -38,7 +38,7 @@ def configure(rules, normalization_rules):
             raise UsageError("Missing field 'replacements' in mutation configuration.")
         if not isinstance(rule['replacements'], list):
             raise UsageError("Field 'replacements' in mutation configuration "
             raise UsageError("Missing field 'replacements' in mutation configuration.")
         if not isinstance(rule['replacements'], list):
             raise UsageError("Field 'replacements' in mutation configuration "
-                                 "must be a list of texts.")
+                             "must be a list of texts.")
 
         config['mutations'].append((rule['pattern'], rule['replacements']))
 
 
         config['mutations'].append((rule['pattern'], rule['replacements']))
 
@@ -47,10 +47,10 @@ def configure(rules, normalization_rules):
 
 ### Analysis section
 
 
 ### Analysis section
 
-def create(transliterator, config):
+def create(normalizer, transliterator, config):
     """ Create a new token analysis instance for this module.
     """
     """ Create a new token analysis instance for this module.
     """
-    return GenericTokenAnalysis(transliterator, config)
+    return GenericTokenAnalysis(normalizer, transliterator, config)
 
 
 class GenericTokenAnalysis:
 
 
 class GenericTokenAnalysis:
@@ -58,7 +58,8 @@ class GenericTokenAnalysis:
         and provides the functions to apply the transformations.
     """
 
         and provides the functions to apply the transformations.
     """
 
-    def __init__(self, to_ascii, config):
+    def __init__(self, norm, to_ascii, config):
+        self.norm = norm
         self.to_ascii = to_ascii
         self.variant_only = config['variant_only']
 
         self.to_ascii = to_ascii
         self.variant_only = config['variant_only']
 
@@ -74,6 +75,13 @@ class GenericTokenAnalysis:
         self.mutations = [MutationVariantGenerator(*cfg) for cfg in config['mutations']]
 
 
         self.mutations = [MutationVariantGenerator(*cfg) for cfg in config['mutations']]
 
 
+    def normalize(self, name):
+        """ Return the normalized form of the name. This is the standard form
+            from which possible variants for the name can be derived.
+        """
+        return self.norm.transliterate(name).strip()
+
+
     def get_variants_ascii(self, norm_name):
         """ Compute the spelling variants for the given normalized name
             and transliterate the result.
     def get_variants_ascii(self, norm_name):
         """ Compute the spelling variants for the given normalized name
             and transliterate the result.