]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3555 from IvanShift/patch-1
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 17 Feb 2025 17:44:11 +0000 (18:44 +0100)
committerGitHub <noreply@github.com>
Mon, 17 Feb 2025 17:44:11 +0000 (18:44 +0100)
Fixed Russian abbreviation list

1  2 
settings/icu_tokenizer.yaml

index 6cf30d59e55b961d32ae7fc1b9a08bf9118d1c62,cbbd1effba4a3f7cb000b9b4d8bec55ddf025723..a6545bdd1ca7ff9244496aa24ad9a314cb1dfe67
@@@ -1,6 -1,3 +1,6 @@@
 +query-preprocessing:
 +    - step: split_japanese_phrases
 +    - step: normalize
  normalization:
      - ":: lower ()"
      - ":: Hans-Hant"
      - "'nº' > 'no'"
      - "ª > a"
      - "º > o"
 -    - "[[:Punctuation:][:Symbol:]\u02bc]  > ' '"
 +    - "[[:Punctuation:][:Symbol:][\u02bc] - [-:]]+  > '-'"
      - "ß > 'ss'" # German szet is unambiguously equal to double ss
 -    - "[^[:alnum:] [:Canonical_Combining_Class=Virama:] [:Space:]] >"
 +    - "[^[:alnum:] [:Canonical_Combining_Class=Virama:] [:Space:] [-:]] >"
      - "[:Lm:] >"
      - ":: [[:Number:]] Latin ()"
      - ":: [[:Number:]] Ascii ();"
      - ":: [[:Number:]] NFD ();"
      - "[[:Nonspacing Mark:] [:Cf:]] >;"
 -    - "[:Space:]+ > ' '"
 +    - "[-:]?[:Space:]+[-:]? > ' '"
  transliteration:
 +    - "[-:]  > ' '"
      - ":: Latin ()"
      - !include icu-rules/extended-unicode-to-asccii.yaml
      - ":: Ascii ()"
@@@ -28,7 -24,6 +28,7 @@@
      - ":: lower ()"
      - "[^a-z0-9[:Space:]] >"
      - ":: NFC ()"
 +    - "[:Space:]+ > ' '"
  sanitizers:
      - step: clean-housenumbers
        filter-kind:
      - step: clean-postcodes
        convert-to-address: yes
        default-pattern: "[A-Z0-9- ]{3,12}"
 +    - step: clean-tiger-tags
      - step: split-name-list
 +      delimiters: ;
      - step: strip-brace-terms
      - step: tag-analyzer-by-language
        filter-kind: [".*name.*"]
        whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
        use-defaults: all
        mode: append
 +    - step: tag-japanese
  token-analysis:
      - analyzer: generic
      - id: "@housenumber"
        mode: variant-only
        variants:
            - !include icu-rules/variants-ru.yaml
+       mutations:
+           - pattern: ё
+             replacements: ["ё", "е"]
      - id: sk
        analyzer: generic
        mode: variant-only