]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/tag_japanese.py
reintroduce cutoffs when searching for very frequent words
[nominatim.git] / nominatim / tokenizer / sanitizers / tag_japanese.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 This sanitizer maps OSM data to Japanese block addresses.
9 It replaces blocknumber and housenumber with housenumber,
10 and quarter and neighbourhood with place.
11 """
12
13
14 from typing import Callable
15 from typing import List, Optional
16
17 from nominatim.tokenizer.sanitizers.base import ProcessInfo
18 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
19 from nominatim.data.place_name import PlaceName
20
21 def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
22     """Set up the sanitizer
23     """
24     return tag_japanese
25
26 def reconbine_housenumber(
27     new_address: List[PlaceName],
28     tmp_housenumber: Optional[str],
29     tmp_blocknumber: Optional[str]
30 ) -> List[PlaceName]:
31     """ Recombine the tag of housenumber by using housenumber and blocknumber
32     """
33     if tmp_blocknumber and tmp_housenumber:
34         new_address.append(
35             PlaceName(
36                 kind='housenumber',
37                 name=f'{tmp_blocknumber}-{tmp_housenumber}',
38                 suffix=''
39             )
40         )
41     elif tmp_blocknumber:
42         new_address.append(
43             PlaceName(
44                 kind='housenumber',
45                 name=tmp_blocknumber,
46                 suffix=''
47             )
48         )
49     elif tmp_housenumber:
50         new_address.append(
51             PlaceName(
52                 kind='housenumber',
53                 name=tmp_housenumber,
54                 suffix=''
55             )
56         )
57     return new_address
58
59 def reconbine_place(
60     new_address: List[PlaceName],
61     tmp_neighbourhood: Optional[str],
62     tmp_quarter: Optional[str]
63 ) -> List[PlaceName]:
64     """ Recombine the tag of place by using neighbourhood and quarter
65     """
66     if tmp_neighbourhood and tmp_quarter:
67         new_address.append(
68             PlaceName(
69                 kind='place',
70                 name=f'{tmp_quarter}{tmp_neighbourhood}',
71                 suffix=''
72             )
73         )
74     elif tmp_neighbourhood:
75         new_address.append(
76             PlaceName(
77                 kind='place',
78                 name=tmp_neighbourhood,
79                 suffix=''
80             )
81         )
82     elif tmp_quarter:
83         new_address.append(
84             PlaceName(
85                 kind='place',
86                 name=tmp_quarter,
87                 suffix=''
88             )
89         )
90     return new_address
91 def tag_japanese(obj: ProcessInfo) -> None:
92     """Recombine kind of address
93     """
94     if obj.place.country_code != 'jp':
95         return
96     tmp_housenumber = None
97     tmp_blocknumber = None
98     tmp_neighbourhood = None
99     tmp_quarter = None
100
101     new_address = []
102     for item in obj.address:
103         if item.kind == 'housenumber':
104             tmp_housenumber = item.name
105         elif item.kind == 'block_number':
106             tmp_blocknumber = item.name
107         elif item.kind == 'neighbourhood':
108             tmp_neighbourhood = item.name
109         elif item.kind == 'quarter':
110             tmp_quarter = item.name
111         else:
112             new_address.append(item)
113
114     new_address = reconbine_housenumber(new_address, tmp_housenumber, tmp_blocknumber)
115     new_address = reconbine_place(new_address, tmp_neighbourhood, tmp_quarter)
116
117     obj.address = [item for item in new_address if item.name is not None]