]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/tag_japanese.py
add japanese sanitizer
[nominatim.git] / nominatim / tokenizer / sanitizers / tag_japanese.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 This sanitizer maps OSM data to Japanese block addresses.
9 It replaces blocknumber and housenumber with housenumber,
10 and quarter and neighbourhood with place.
11 """
12
13
14 from typing import Callable
15 from typing import List
16
17 from nominatim.tokenizer.sanitizers.base import ProcessInfo
18 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
19 from nominatim.data.place_name import PlaceName
20
21 def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
22 #def create(config: SanitizerConfig) -> Callable[[ProcessInfo],None]:
23     """Set up the sanitizer
24     """
25     return tag_japanese
26     #return tag_japanese(config)
27
28 def convert_kanji_sequence_to_number(sequence: str) -> str:
29     """Converts Kanji numbers to Arabic numbers
30     """
31     kanji_map = {
32       '零': '0',
33       '一': '1',
34       '二': '2',
35       '三': '3',
36       '四': '4',
37       '五': '5',
38       '六': '6',
39       '七': '7',
40       '八': '8',
41       '九': '9'
42     }
43     converted = ''
44     current_number = ''
45     for char in sequence:
46         if char in kanji_map:
47             current_number += kanji_map[char]
48         else:
49             converted += current_number
50             current_number = ''
51             converted += char
52     converted += current_number
53     return converted
54
55 def reconbine_housenumber(
56     new_address: List[PlaceName],
57     tmp_housenumber: str | None,
58     tmp_blocknumber: str | None
59 ) -> List[PlaceName]:
60     """ Recombine the tag of housenumber by using housenumber and blocknumber
61     """
62     if tmp_blocknumber and tmp_housenumber:
63         new_address.append(
64             PlaceName(
65                 kind='housenumber',
66                 name=f'{tmp_blocknumber}-{tmp_housenumber}',
67                 suffix=''
68             )
69         )
70     elif tmp_blocknumber:
71         new_address.append(
72             PlaceName(
73                 kind='housenumber',
74                 name=f'{tmp_blocknumber}',
75                 suffix=''
76             )
77         )
78     elif tmp_housenumber:
79         new_address.append(
80             PlaceName(
81                 kind='housenumber',
82                 name=f'{tmp_housenumber}',
83                 suffix=''
84             )
85         )
86     return new_address
87
88 def reconbine_place(
89     new_address: List[PlaceName],
90     tmp_neighbourhood: str | None,
91     tmp_quarter: str | None
92 ) -> List[PlaceName]:
93     """ Recombine the tag of place by using neighbourhood and quarter
94     """
95     if tmp_neighbourhood and tmp_quarter:
96         new_address.append(
97             PlaceName(
98                 kind='place',
99                 name=f'{tmp_quarter}{tmp_neighbourhood}',
100                 suffix=''
101             )
102         )
103     elif tmp_neighbourhood:
104         new_address.append(
105             PlaceName(
106                 kind='place',
107                 name=f'{tmp_neighbourhood}',
108                 suffix=''
109             )
110         )
111     elif tmp_quarter:
112         new_address.append(
113             PlaceName(
114                 kind='place',
115                 name=f'{tmp_quarter}',
116                 suffix=''
117             )
118         )
119     return new_address
120 def tag_japanese(obj: ProcessInfo) -> None:
121     """Recombine kind of address
122     """
123     if obj.place.country_code != 'jp':
124         return
125     tmp_housenumber = None
126     tmp_blocknumber = None
127     tmp_neighbourhood = None
128     tmp_quarter = None
129
130     new_address = []
131     for item in obj.names:
132         item.name = convert_kanji_sequence_to_number(item.name)
133
134     for item in obj.address:
135         item.name = convert_kanji_sequence_to_number(item.name)
136         if item.kind == 'housenumber':
137             tmp_housenumber = item.name
138         elif item.kind == 'block_number':
139             tmp_blocknumber = item.name
140         elif item.kind == 'neighbourhood':
141             tmp_neighbourhood = item.name
142         elif item.kind == 'quarter':
143             tmp_quarter = item.name
144         else:
145             new_address.append(item)
146
147     new_address = reconbine_housenumber(new_address,tmp_housenumber,tmp_blocknumber)
148     new_address = reconbine_place(new_address,tmp_neighbourhood,tmp_quarter)
149
150     obj.address = [item for item in new_address if item.name is not None]