]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/indexer/runners.py
postcodes: strip leading country codes
[nominatim.git] / nominatim / indexer / runners.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Mix-ins that provide the actual commands for the indexer for various indexing
9 tasks.
10 """
11 import functools
12
13 from psycopg2 import sql as pysql
14
15 from nominatim.indexer.place_info import PlaceInfo
16
17 # pylint: disable=C0111
18
19 def _mk_valuelist(template, num):
20     return pysql.SQL(',').join([pysql.SQL(template)] * num)
21
22
23 class AbstractPlacexRunner:
24     """ Returns SQL commands for indexing of the placex table.
25     """
26     SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
27     UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
28
29     def __init__(self, rank, analyzer):
30         self.rank = rank
31         self.analyzer = analyzer
32
33
34     @staticmethod
35     @functools.lru_cache(maxsize=1)
36     def _index_sql(num_places):
37         return pysql.SQL(
38             """ UPDATE placex
39                 SET indexed_status = 0, address = v.addr, token_info = v.ti,
40                     name = v.name, linked_place_id = v.linked_place_id
41                 FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
42                 WHERE place_id = v.id
43             """).format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places))
44
45
46     @staticmethod
47     def get_place_details(worker, ids):
48         worker.perform("""SELECT place_id, extra.*
49                           FROM placex, LATERAL placex_indexing_prepare(placex) as extra
50                           WHERE place_id IN %s""",
51                        (tuple((p[0] for p in ids)), ))
52
53
54     def index_places(self, worker, places):
55         values = []
56         for place in places:
57             for field in ('place_id', 'name', 'address', 'linked_place_id'):
58                 values.append(place[field])
59             values.append(PlaceInfo(place).analyze(self.analyzer))
60
61         worker.perform(self._index_sql(len(places)), values)
62
63
64 class RankRunner(AbstractPlacexRunner):
65     """ Returns SQL commands for indexing one rank within the placex table.
66     """
67
68     def name(self):
69         return f"rank {self.rank}"
70
71     def sql_count_objects(self):
72         return pysql.SQL("""SELECT count(*) FROM placex
73                             WHERE rank_address = {} and indexed_status > 0
74                          """).format(pysql.Literal(self.rank))
75
76     def sql_get_objects(self):
77         return self.SELECT_SQL + pysql.SQL(
78             """WHERE indexed_status > 0 and rank_address = {}
79                ORDER BY geometry_sector
80             """).format(pysql.Literal(self.rank))
81
82
83 class BoundaryRunner(AbstractPlacexRunner):
84     """ Returns SQL commands for indexing the administrative boundaries
85         of a certain rank.
86     """
87
88     def name(self):
89         return f"boundaries rank {self.rank}"
90
91     def sql_count_objects(self):
92         return pysql.SQL("""SELECT count(*) FROM placex
93                             WHERE indexed_status > 0
94                               AND rank_search = {}
95                               AND class = 'boundary' and type = 'administrative'
96                          """).format(pysql.Literal(self.rank))
97
98     def sql_get_objects(self):
99         return self.SELECT_SQL + pysql.SQL(
100             """WHERE indexed_status > 0 and rank_search = {}
101                      and class = 'boundary' and type = 'administrative'
102                ORDER BY partition, admin_level
103             """).format(pysql.Literal(self.rank))
104
105
106 class InterpolationRunner:
107     """ Returns SQL commands for indexing the address interpolation table
108         location_property_osmline.
109     """
110
111     def __init__(self, analyzer):
112         self.analyzer = analyzer
113
114
115     @staticmethod
116     def name():
117         return "interpolation lines (location_property_osmline)"
118
119     @staticmethod
120     def sql_count_objects():
121         return """SELECT count(*) FROM location_property_osmline
122                   WHERE indexed_status > 0"""
123
124     @staticmethod
125     def sql_get_objects():
126         return """SELECT place_id
127                   FROM location_property_osmline
128                   WHERE indexed_status > 0
129                   ORDER BY geometry_sector"""
130
131
132     @staticmethod
133     def get_place_details(worker, ids):
134         worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
135                           FROM location_property_osmline WHERE place_id IN %s""",
136                        (tuple((p[0] for p in ids)), ))
137
138
139     @staticmethod
140     @functools.lru_cache(maxsize=1)
141     def _index_sql(num_places):
142         return pysql.SQL("""UPDATE location_property_osmline
143                             SET indexed_status = 0, address = v.addr, token_info = v.ti
144                             FROM (VALUES {}) as v(id, addr, ti)
145                             WHERE place_id = v.id
146                          """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
147
148
149     def index_places(self, worker, places):
150         values = []
151         for place in places:
152             values.extend((place[x] for x in ('place_id', 'address')))
153             values.append(PlaceInfo(place).analyze(self.analyzer))
154
155         worker.perform(self._index_sql(len(places)), values)
156
157
158
159 class PostcodeRunner:
160     """ Provides the SQL commands for indexing the location_postcode table.
161     """
162
163     @staticmethod
164     def name():
165         return "postcodes (location_postcode)"
166
167     @staticmethod
168     def sql_count_objects():
169         return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
170
171     @staticmethod
172     def sql_get_objects():
173         return """SELECT place_id FROM location_postcode
174                   WHERE indexed_status > 0
175                   ORDER BY country_code, postcode"""
176
177     @staticmethod
178     def index_places(worker, ids):
179         worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
180                                     WHERE place_id IN ({})""")
181                        .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))