]> git.openstreetmap.org Git - nominatim.git/blob - settings/icu_tokenizer.yaml
introduce sanitizer step before token analysis
[nominatim.git] / settings / icu_tokenizer.yaml
1 normalization:
2     - ":: lower ()"
3     - ":: Hans-Hant"
4     - !include icu-rules/unicode-digits-to-decimal.yaml
5     - "'№' > 'no'"
6     - "'n°' > 'no'"
7     - "'nº' > 'no'"
8     - "ª > a"
9     - "º > o"
10     - "[[:Punctuation:][:Symbol:]]  > ' '"
11     - "ß > 'ss'" # German szet is unimbigiously equal to double ss
12     - "[^[:Letter:] [:Number:] [:Space:]] >"
13     - "[:Lm:] >"
14     - ":: [[:Number:]] Latin ()"
15     - ":: [[:Number:]] Ascii ();"
16     - ":: [[:Number:]] NFD ();"
17     - "[[:Nonspacing Mark:] [:Cf:]] >;"
18     - "[:Space:]+ > ' '"
19 transliteration:
20     - ":: Latin ()"
21     - !include icu-rules/extended-unicode-to-asccii.yaml
22     - ":: Ascii ()"
23     - ":: NFD ()"
24     - "[^[:Ascii:]] >"
25     - ":: lower ()"
26     - ":: NFC ()"
27 sanitizers:
28     - step: split-name-list
29     - step: strip-brace-terms
30 variants:
31     - !include icu-rules/variants-bg.yaml
32     - !include icu-rules/variants-ca.yaml
33     - !include icu-rules/variants-cs.yaml
34     - !include icu-rules/variants-da.yaml
35     - !include icu-rules/variants-de.yaml
36     - !include icu-rules/variants-el.yaml
37     - !include icu-rules/variants-en.yaml
38     - !include icu-rules/variants-es.yaml
39     - !include icu-rules/variants-et.yaml
40     - !include icu-rules/variants-eu.yaml
41     - !include icu-rules/variants-fi.yaml
42     - !include icu-rules/variants-fr.yaml
43     - !include icu-rules/variants-gl.yaml
44     - !include icu-rules/variants-hu.yaml
45     - !include icu-rules/variants-it.yaml
46     - !include icu-rules/variants-ja.yaml
47     - !include icu-rules/variants-mg.yaml
48     - !include icu-rules/variants-ms.yaml
49     - !include icu-rules/variants-nl.yaml
50     - !include icu-rules/variants-no.yaml
51     - !include icu-rules/variants-pl.yaml
52     - !include icu-rules/variants-pt.yaml
53     - !include icu-rules/variants-ro.yaml
54     - !include icu-rules/variants-ru.yaml
55     - !include icu-rules/variants-sk.yaml
56     - !include icu-rules/variants-sl.yaml
57     - !include icu-rules/variants-sv.yaml
58     - !include icu-rules/variants-tr.yaml
59     - !include icu-rules/variants-uk.yaml
60     - !include icu-rules/variants-vi.yaml