rendering:
heading_level: 6
+#### tag-japanese
+
+::: nominatim.tokenizer.sanitizers.tag_japanese
+ selection:
+ members: False
+ rendering:
+ heading_level: 6
+
#### Token Analysis
Token analyzers take a full name and transform it into one or more normalized
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+This sanitizer maps OSM data to Japanese block addresses.
+It replaces blocknumber and housenumber with housenumber,
+and quarter and neighbourhood with place.
+"""
+
+
+from typing import Callable
+from typing import List, Optional
+
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
+from nominatim.data.place_name import PlaceName
+
+def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+ """Set up the sanitizer
+ """
+ return tag_japanese
+
+def reconbine_housenumber(
+ new_address: List[PlaceName],
+ tmp_housenumber: Optional[str],
+ tmp_blocknumber: Optional[str]
+) -> List[PlaceName]:
+ """ Recombine the tag of housenumber by using housenumber and blocknumber
+ """
+ if tmp_blocknumber and tmp_housenumber:
+ new_address.append(
+ PlaceName(
+ kind='housenumber',
+ name=f'{tmp_blocknumber}-{tmp_housenumber}',
+ suffix=''
+ )
+ )
+ elif tmp_blocknumber:
+ new_address.append(
+ PlaceName(
+ kind='housenumber',
+ name=tmp_blocknumber,
+ suffix=''
+ )
+ )
+ elif tmp_housenumber:
+ new_address.append(
+ PlaceName(
+ kind='housenumber',
+ name=tmp_housenumber,
+ suffix=''
+ )
+ )
+ return new_address
+
+def reconbine_place(
+ new_address: List[PlaceName],
+ tmp_neighbourhood: Optional[str],
+ tmp_quarter: Optional[str]
+) -> List[PlaceName]:
+ """ Recombine the tag of place by using neighbourhood and quarter
+ """
+ if tmp_neighbourhood and tmp_quarter:
+ new_address.append(
+ PlaceName(
+ kind='place',
+ name=f'{tmp_quarter}{tmp_neighbourhood}',
+ suffix=''
+ )
+ )
+ elif tmp_neighbourhood:
+ new_address.append(
+ PlaceName(
+ kind='place',
+ name=tmp_neighbourhood,
+ suffix=''
+ )
+ )
+ elif tmp_quarter:
+ new_address.append(
+ PlaceName(
+ kind='place',
+ name=tmp_quarter,
+ suffix=''
+ )
+ )
+ return new_address
+def tag_japanese(obj: ProcessInfo) -> None:
+ """Recombine kind of address
+ """
+ if obj.place.country_code != 'jp':
+ return
+ tmp_housenumber = None
+ tmp_blocknumber = None
+ tmp_neighbourhood = None
+ tmp_quarter = None
+
+ new_address = []
+ for item in obj.address:
+ if item.kind == 'housenumber':
+ tmp_housenumber = item.name
+ elif item.kind == 'block_number':
+ tmp_blocknumber = item.name
+ elif item.kind == 'neighbourhood':
+ tmp_neighbourhood = item.name
+ elif item.kind == 'quarter':
+ tmp_quarter = item.name
+ else:
+ new_address.append(item)
+
+ new_address = reconbine_housenumber(new_address, tmp_housenumber, tmp_blocknumber)
+ new_address = reconbine_place(new_address, tmp_neighbourhood, tmp_quarter)
+
+ obj.address = [item for item in new_address if item.name is not None]
-- "[๐ฅ๐ ฿๐ญ๊ค๐ฉ ๐๐๐ฐ๐ถ๊ฉ๊ แฑแญแฎฐแ แ แฅเผ เป๊งฐแแชแชแง๐ต๊ฏฐแฑ๐ฑ๐ฐ๐๐๐๊ง๊ฃเทฆ๐ฆ๏ผ๐ถ๐๐ฌ๐๐ขโโฟโชโฐ] > 0"
-- "[๐ฅ๐ก฿๐ญ๊ค๐ฉก๐๐๐ฑ๐ท๊ฉ๊กแฑแญแฎฑแ แกแฅเผกเป๊งฑแแชแชแง๐ต๊ฏฑแฑ๐ฑ๐ฑ๐๐๐๊ง๊ฃเทง๐ง๏ผ๐ท๐๐ญ๐๐ฃโยนโ โดโโถโโโต] > 1"
-- "[๐ฅ๐ข฿๐ญ๊ค๐ฉข๐๐๐ฒ๐ธ๊ฉ๊ขแฑแญแฎฒแ แขแฅเผขเป๊งฒแแชแชแง๐ต๊ฏฒแฑ๐ฑ๐ฒ๐๐๐๊ง๊ฃเทจ๐จ๏ผ๐ธ๐๐ฎ๐๐คโยฒโกโตโโทโโโถ] > 2"
-- "[๐ฅ๐ฃ฿๐ญ๊ค๐ฉฃ๐๐๐ณ๐น๊ฉ๊ฃแฑแญแฎณแ แฃแฅเผฃเป๊งณแแชแชแง๐ต๊ฏณแฑ๐ฑ๐ณ๐๐๐๊ง๊ฃเทฉ๐ฉ๏ผ๐น๐๐ฏ๐๐ฅโยณโขโถโโธโโโท] > 3"
-- "[๐ฅ๐ค฿๐ญ๊ค๐ฉค๐๐๐ด๐บ๊ฉ๊คแฑแญแฎดแ แคแฅเผคเป๊งดแแชแชแง๐ต๊ฏดแฑ๐ฑ๐ด๐๐๐๊ง๊ฃเทช๐ช๏ผ๐บ๐๐ฐ๐๐ฆโโดโฃโทโโนโโโธ] > 4"
-- "[๐ฅ๐ฅ฿
๐ญ๊ค
๐ฉฅ๐๐๐ต๐ป๊ฉ๊ฅแฑ
แญแฎตแ แฅแฅเผฅเป๊งตแแชแช
แง๐ต๊ฏตแฑ๐ฑ๐ต๐
๐๐๊ง๊ฃเทซ๐ซ๏ผ๐ป๐๐ฑ๐๐งโ
โตโคโธโโบโโโน] > 5"
-- "[๐ฅ๐ฆ฿๐ญ๊ค๐ฉฆ๐๐๐ถ๐ผ๊ฉ๊ฆแฑแญแฎถแ แฆแฅเผฆเป๊งถแแชแชแง๐ต๊ฏถแฑ๐ฑ๐ถ๐๐๐๊ง๊ฃเทฌ๐ฌ๏ผ๐ผ๐๐ฒ๐๐จโโถโฅโนโโปโ
โโบ] > 6"
-- "[๐ฅ๐ง฿๐ญ๊ค๐ฉง๐๐๐ท๐ฝ๊ฉ๊งแฑแญแฎทแ แงแฅเผงเป๊งทแแชแชแง๐ต๊ฏทแฑ๐ฑ๐ท๐๐๐๊ง๊ฃเทญ๐ญ๏ผ๐ฝ๐๐ณ๐๐ฉโโทโฆโบโโผโโโป] > 7"
-- "[๐ฅ๐จ฿๐ญ๊ค๐ฉจ๐๐๐ธ๐พ๊ฉ๊จแฑแญแฎธแ แจแฅเผจเป๊งธแแชแชแง๐ต๊ฏธแฑ๐ฑ๐ธ๐๐๐๊ง๊ฃเทฎ๐ฎ๏ผ๐พ๐ ๐ด๐๐ชโโธโงโปโโฝโโโผ] > 8"
-- "[๐ฅ๐ฉ฿๐ญ๊ค๐ฉฉ๐๐๐น๐ฟ๊ฉ๊ฉแฑแญแฎนแ แฉแฅเผฉเป๊งนแแชแชแง๐ต๊ฏนแฑ๐ฑ๐น๐๐๐๊ง๊ฃเทฏ๐ฏ๏ผ๐ฟ๐ก๐ต๐๐ซโโนโจโผโโพโโโฝ] > 9"
-- "[๐บโฉโฝโโฟโโโพ] > '10'"
+- "[๐ฅ๐ ฿๐ญ๊ค๐ฉ ๐๐๐ฐ๐ถ๊ฉ๊ แฑแญแฎฐแ แ แฅเผ เป๊งฐแแชแชแง๐ต๊ฏฐแฑ๐ฑ๐ฐ๐๐๐๊ง๊ฃเทฆ๐ฆ๏ผ๐ถ๐๐ฌ๐๐ขโโฟโชโฐ้ถ] > 0"
+- "[๐ฅ๐ก฿๐ญ๊ค๐ฉก๐๐๐ฑ๐ท๊ฉ๊กแฑแญแฎฑแ แกแฅเผกเป๊งฑแแชแชแง๐ต๊ฏฑแฑ๐ฑ๐ฑ๐๐๐๊ง๊ฃเทง๐ง๏ผ๐ท๐๐ญ๐๐ฃโยนโ โดโโถโโโตไธ] > 1"
+- "[๐ฅ๐ข฿๐ญ๊ค๐ฉข๐๐๐ฒ๐ธ๊ฉ๊ขแฑแญแฎฒแ แขแฅเผขเป๊งฒแแชแชแง๐ต๊ฏฒแฑ๐ฑ๐ฒ๐๐๐๊ง๊ฃเทจ๐จ๏ผ๐ธ๐๐ฎ๐๐คโยฒโกโตโโทโโโถไบ] > 2"
+- "[๐ฅ๐ฃ฿๐ญ๊ค๐ฉฃ๐๐๐ณ๐น๊ฉ๊ฃแฑแญแฎณแ แฃแฅเผฃเป๊งณแแชแชแง๐ต๊ฏณแฑ๐ฑ๐ณ๐๐๐๊ง๊ฃเทฉ๐ฉ๏ผ๐น๐๐ฏ๐๐ฅโยณโขโถโโธโโโทไธ] > 3"
+- "[๐ฅ๐ค฿๐ญ๊ค๐ฉค๐๐๐ด๐บ๊ฉ๊คแฑแญแฎดแ แคแฅเผคเป๊งดแแชแชแง๐ต๊ฏดแฑ๐ฑ๐ด๐๐๐๊ง๊ฃเทช๐ช๏ผ๐บ๐๐ฐ๐๐ฆโโดโฃโทโโนโโโธๅ] > 4"
+- "[๐ฅ๐ฅ฿
๐ญ๊ค
๐ฉฅ๐๐๐ต๐ป๊ฉ๊ฅแฑ
แญแฎตแ แฅแฅเผฅเป๊งตแแชแช
แง๐ต๊ฏตแฑ๐ฑ๐ต๐
๐๐๊ง๊ฃเทซ๐ซ๏ผ๐ป๐๐ฑ๐๐งโ
โตโคโธโโบโโโนไบ] > 5"
+- "[๐ฅ๐ฆ฿๐ญ๊ค๐ฉฆ๐๐๐ถ๐ผ๊ฉ๊ฆแฑแญแฎถแ แฆแฅเผฆเป๊งถแแชแชแง๐ต๊ฏถแฑ๐ฑ๐ถ๐๐๐๊ง๊ฃเทฌ๐ฌ๏ผ๐ผ๐๐ฒ๐๐จโโถโฅโนโโปโ
โโบๅ
ญ] > 6"
+- "[๐ฅ๐ง฿๐ญ๊ค๐ฉง๐๐๐ท๐ฝ๊ฉ๊งแฑแญแฎทแ แงแฅเผงเป๊งทแแชแชแง๐ต๊ฏทแฑ๐ฑ๐ท๐๐๐๊ง๊ฃเทญ๐ญ๏ผ๐ฝ๐๐ณ๐๐ฉโโทโฆโบโโผโโโปไธ] > 7"
+- "[๐ฅ๐จ฿๐ญ๊ค๐ฉจ๐๐๐ธ๐พ๊ฉ๊จแฑแญแฎธแ แจแฅเผจเป๊งธแแชแชแง๐ต๊ฏธแฑ๐ฑ๐ธ๐๐๐๊ง๊ฃเทฎ๐ฎ๏ผ๐พ๐ ๐ด๐๐ชโโธโงโปโโฝโโโผๅ
ซ] > 8"
+- "[๐ฅ๐ฉ฿๐ญ๊ค๐ฉฉ๐๐๐น๐ฟ๊ฉ๊ฉแฑแญแฎนแ แฉแฅเผฉเป๊งนแแชแชแง๐ต๊ฏนแฑ๐ฑ๐น๐๐๐๊ง๊ฃเทฏ๐ฏ๏ผ๐ฟ๐ก๐ต๐๐ซโโนโจโผโโพโโโฝไน] > 9"
+- "[๐บโฉโฝโโฟโโโพๅ] > '10'"
- "[โชโพโโซ] > '11'"
- "[โซโฟโโฌ] > '12'"
- "[โฌโโโญ] > '13'"
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
use-defaults: all
mode: append
+ - step: tag-japanese
token-analysis:
- analyzer: generic
- id: "@housenumber"
--- /dev/null
+@DB
+Feature: Searches in Japan
+ Test specifically for searches of Japanese addresses and in Japanese language.
+ @fail-legacy
+ Scenario: A block house-number is parented to the neighbourhood
+ Given the grid with origin JP
+ | 1 | | | | 2 |
+ | | 3 | | | |
+ | | | 9 | | |
+ | | | | 6 | |
+ And the places
+ | osm | class | type | name | geometry |
+ | W1 | highway | residential | ้ๅญๆฉ้ใ | 1,2 |
+ And the places
+ | osm | class | type | housenr | addr+block_number | addr+neighbourhood | geometry |
+ | N3 | amenity | restaurant | 2 | 6 | 2ไธ็ฎ | 3 |
+ And the places
+ | osm | class | type | name | geometry |
+ | N9 | place | neighbourhood | 2ไธ็ฎ | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | N6 | place | quarter | ๅ ็ฌ | 6 |
+ When importing
+ Then placex contains
+ | object | parent_place_id |
+ | N3 | N9 |
+ When sending search query "2ไธ็ฎ 6-2"
+ Then results contain
+ | osm |
+ | N3 |
--- /dev/null
+from nominatim.data.place_info import PlaceInfo
+from nominatim.data.place_name import PlaceName
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from typing import Mapping, Optional, List
+import pytest
+
+class TestTagJapanese:
+ @pytest.fixture(autouse=True)
+ def setup_country(self, def_config):
+ self.config = def_config
+
+ def run_sanitizer_on(self,type, **kwargs):
+ place = PlaceInfo({
+ 'address': kwargs,
+ 'country_code': 'jp'
+ })
+ sanitizer_args = {'step': 'tag-japanese'}
+ _, address = PlaceSanitizer([sanitizer_args], self.config).process_names(place)
+ tmp_list = [(p.name,p.kind) for p in address]
+ return sorted(tmp_list)
+
+ def test_on_address(self):
+ res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
+ assert res == [('bar','ref'),('baz','ref_abc'),('foo','name')]
+
+ def test_housenumber(self):
+ res = self.run_sanitizer_on('address', housenumber='2')
+ assert res == [('2','housenumber')]
+
+ def test_blocknumber(self):
+ res = self.run_sanitizer_on('address', block_number='6')
+ assert res == [('6','housenumber')]
+
+ def test_neighbourhood(self):
+ res = self.run_sanitizer_on('address', neighbourhood='8')
+ assert res == [('8','place')]
+
+ def test_quarter(self):
+ res = self.run_sanitizer_on('address', quarter='kase')
+ assert res==[('kase','place')]
+
+ def test_housenumber_blocknumber(self):
+ res = self.run_sanitizer_on('address', housenumber='2', block_number='6')
+ assert res == [('6-2','housenumber')]
+
+ def test_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address', quarter='kase', neighbourhood='8')
+ assert res == [('kase8','place')]
+
+ def test_blocknumber_housenumber_quarter(self):
+ res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase')
+ assert res == [('6-2','housenumber'),('kase','place')]
+
+ def test_blocknumber_housenumber_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address', block_number='6', housenumber='2', neighbourhood='8')
+ assert res == [('6-2','housenumber'),('8','place')]
+
+ def test_blocknumber_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address',block_number='6', quarter='kase', neighbourhood='8')
+ assert res == [('6','housenumber'),('kase8','place')]
+
+ def test_blocknumber_quarter(self):
+ res = self.run_sanitizer_on('address',block_number='6', quarter='kase')
+ assert res == [('6','housenumber'),('kase','place')]
+
+ def test_blocknumber_neighbourhood(self):
+ res = self.run_sanitizer_on('address',block_number='6', neighbourhood='8')
+ assert res == [('6','housenumber'),('8','place')]
+
+ def test_housenumber_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address',housenumber='2', quarter='kase', neighbourhood='8')
+ assert res == [('2','housenumber'),('kase8','place')]
+
+ def test_housenumber_quarter(self):
+ res = self.run_sanitizer_on('address',housenumber='2', quarter='kase')
+ assert res == [('2','housenumber'),('kase','place')]
+
+ def test_housenumber_blocknumber_neighbourhood_quarter(self):
+ res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8')
+ assert res == [('6-2','housenumber'),('kase8','place')]