X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8413075249e1bb2832df4edd0f66d61f77fb9f99..656c1291b15d7f81f87768a7a1eafb7f2d0223d5:/settings/legacy_icu_tokenizer.yaml diff --git a/settings/legacy_icu_tokenizer.yaml b/settings/legacy_icu_tokenizer.yaml index 34cd8b0b..5fd30bd8 100644 --- a/settings/legacy_icu_tokenizer.yaml +++ b/settings/legacy_icu_tokenizer.yaml @@ -1,116 +1,56 @@ normalization: - - ":: NFD ()" - - "[[:Nonspacing Mark:] [:Cf:]] >" - ":: lower ()" + - !include icu-rules/unicode-digits-to-decimal.yaml + - "'№' > 'no'" + - "'n°' > 'no'" + - "'nº' > 'no'" + - "ª > a" + - "º > o" + - "[[:Punctuation:][:Symbol:]] > ' '" - "ß > 'ss'" # German szet is unimbigiously equal to double ss - - "[[:Punctuation:][:Space:]]+ > ' '" + - "[^[:Letter:] [:Number:] [:Space:]] >" + - "[:Lm:] >" + - ":: [[:Number:]] Latin ()" + - ":: [[:Number:]] Ascii ();" + - ":: [[:Number:]] NFD ();" + - "[[:Nonspacing Mark:] [:Cf:]] >;" + - "[:Space:]+ > ' '" +transliteration: + - ":: Latin ()" + - !include icu-rules/extended-unicode-to-asccii.yaml + - ":: Ascii ()" + - ":: NFD ()" + - "[^[:Ascii:]] >" + - ":: lower ()" - ":: NFC ()" -transliteration: icu_transliteration.rules -compound_suffixes: - # Danish - - hal - - hallen - - hallerne - # German - - berg - - brücke - - fabrik - - gasse - - graben - - haus - - höhle - - hütte - - kapelle - - kogel - - pfad - - platz - - quelle - - spitze - - stiege - - strasse - - teich - - universität - - wald - - weg - - wiese - # Dutch - - gracht - - laan - - markt - - plein - - straat - - vliet - - weg - # Norwegian - - vei - - veien - - veg - - vegen - - gate - - gaten - - gata - - plass - - plassen - - sving - - svingen - # Finnish - - alue - - asema - - aukio - - kaari - - katu - - kuja - - kylä - - penger - - polku - - puistikko - - puisto - - raitti - - ranta - - rinne - - taival - - tie - - tori - - väylä - # Swedish - - väg - - vägen - - gatan - - gata - - gränd - - gränden - - stig - - stigen - - plats - - platsen -abbreviations: - # German - - am => a - - an der => a d - - allgemeines krankenhaus => akh - - altstoffsammelzentrum => asz - - auf der => a d - - bach => b - - bad => b - - bahnhof => bhf,bf - - berg => bg - - bezirk => bez - - brücke => br - - burg => bg - - chaussee => ch - - deutsche,deutscher,deutsches => dt - - dorf => df - - doktor => dr - - fachhochschule => fh - - Freiwillige Feuerwehr => ff - - sankt => st - - strasse => str - - weg => wg - # English - - alley => al - - beach => bch - - street => st - - road => rd - - bridge => brdg - - +variants: + - !include icu-rules/variants-bg.yaml + - !include icu-rules/variants-ca.yaml + - !include icu-rules/variants-cs.yaml + - !include icu-rules/variants-da.yaml + - !include icu-rules/variants-de.yaml + - !include icu-rules/variants-el.yaml + - !include icu-rules/variants-en.yaml + - !include icu-rules/variants-es.yaml + - !include icu-rules/variants-et.yaml + - !include icu-rules/variants-eu.yaml + - !include icu-rules/variants-fi.yaml + - !include icu-rules/variants-fr.yaml + - !include icu-rules/variants-gl.yaml + - !include icu-rules/variants-hu.yaml + - !include icu-rules/variants-it.yaml + - !include icu-rules/variants-ja.yaml + - !include icu-rules/variants-mg.yaml + - !include icu-rules/variants-ms.yaml + - !include icu-rules/variants-nl.yaml + - !include icu-rules/variants-no.yaml + - !include icu-rules/variants-pl.yaml + - !include icu-rules/variants-pt.yaml + - !include icu-rules/variants-ro.yaml + - !include icu-rules/variants-ru.yaml + - !include icu-rules/variants-sk.yaml + - !include icu-rules/variants-sl.yaml + - !include icu-rules/variants-sv.yaml + - !include icu-rules/variants-tr.yaml + - !include icu-rules/variants-uk.yaml + - !include icu-rules/variants-vi.yaml