]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/indexer/runners.py
Vagrant and CI tests for Ubuntu 22.04
[nominatim.git] / nominatim / indexer / runners.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Mix-ins that provide the actual commands for the indexer for various indexing
9 tasks.
10 """
11 import functools
12
13 from psycopg2 import sql as pysql
14 import psycopg2.extras
15
16 from nominatim.data.place_info import PlaceInfo
17
18 # pylint: disable=C0111
19
20 def _mk_valuelist(template, num):
21     return pysql.SQL(',').join([pysql.SQL(template)] * num)
22
23 def _analyze_place(place, analyzer):
24     return psycopg2.extras.Json(analyzer.process_place(PlaceInfo(place)))
25
26 class AbstractPlacexRunner:
27     """ Returns SQL commands for indexing of the placex table.
28     """
29     SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
30     UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
31
32     def __init__(self, rank, analyzer):
33         self.rank = rank
34         self.analyzer = analyzer
35
36
37     @staticmethod
38     @functools.lru_cache(maxsize=1)
39     def _index_sql(num_places):
40         return pysql.SQL(
41             """ UPDATE placex
42                 SET indexed_status = 0, address = v.addr, token_info = v.ti,
43                     name = v.name, linked_place_id = v.linked_place_id
44                 FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
45                 WHERE place_id = v.id
46             """).format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places))
47
48
49     @staticmethod
50     def get_place_details(worker, ids):
51         worker.perform("""SELECT place_id, extra.*
52                           FROM placex, LATERAL placex_indexing_prepare(placex) as extra
53                           WHERE place_id IN %s""",
54                        (tuple((p[0] for p in ids)), ))
55
56
57     def index_places(self, worker, places):
58         values = []
59         for place in places:
60             for field in ('place_id', 'name', 'address', 'linked_place_id'):
61                 values.append(place[field])
62             values.append(_analyze_place(place, self.analyzer))
63
64         worker.perform(self._index_sql(len(places)), values)
65
66
67 class RankRunner(AbstractPlacexRunner):
68     """ Returns SQL commands for indexing one rank within the placex table.
69     """
70
71     def name(self):
72         return f"rank {self.rank}"
73
74     def sql_count_objects(self):
75         return pysql.SQL("""SELECT count(*) FROM placex
76                             WHERE rank_address = {} and indexed_status > 0
77                          """).format(pysql.Literal(self.rank))
78
79     def sql_get_objects(self):
80         return self.SELECT_SQL + pysql.SQL(
81             """WHERE indexed_status > 0 and rank_address = {}
82                ORDER BY geometry_sector
83             """).format(pysql.Literal(self.rank))
84
85
86 class BoundaryRunner(AbstractPlacexRunner):
87     """ Returns SQL commands for indexing the administrative boundaries
88         of a certain rank.
89     """
90
91     def name(self):
92         return f"boundaries rank {self.rank}"
93
94     def sql_count_objects(self):
95         return pysql.SQL("""SELECT count(*) FROM placex
96                             WHERE indexed_status > 0
97                               AND rank_search = {}
98                               AND class = 'boundary' and type = 'administrative'
99                          """).format(pysql.Literal(self.rank))
100
101     def sql_get_objects(self):
102         return self.SELECT_SQL + pysql.SQL(
103             """WHERE indexed_status > 0 and rank_search = {}
104                      and class = 'boundary' and type = 'administrative'
105                ORDER BY partition, admin_level
106             """).format(pysql.Literal(self.rank))
107
108
109 class InterpolationRunner:
110     """ Returns SQL commands for indexing the address interpolation table
111         location_property_osmline.
112     """
113
114     def __init__(self, analyzer):
115         self.analyzer = analyzer
116
117
118     @staticmethod
119     def name():
120         return "interpolation lines (location_property_osmline)"
121
122     @staticmethod
123     def sql_count_objects():
124         return """SELECT count(*) FROM location_property_osmline
125                   WHERE indexed_status > 0"""
126
127     @staticmethod
128     def sql_get_objects():
129         return """SELECT place_id
130                   FROM location_property_osmline
131                   WHERE indexed_status > 0
132                   ORDER BY geometry_sector"""
133
134
135     @staticmethod
136     def get_place_details(worker, ids):
137         worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
138                           FROM location_property_osmline WHERE place_id IN %s""",
139                        (tuple((p[0] for p in ids)), ))
140
141
142     @staticmethod
143     @functools.lru_cache(maxsize=1)
144     def _index_sql(num_places):
145         return pysql.SQL("""UPDATE location_property_osmline
146                             SET indexed_status = 0, address = v.addr, token_info = v.ti
147                             FROM (VALUES {}) as v(id, addr, ti)
148                             WHERE place_id = v.id
149                          """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
150
151
152     def index_places(self, worker, places):
153         values = []
154         for place in places:
155             values.extend((place[x] for x in ('place_id', 'address')))
156             values.append(_analyze_place(place, self.analyzer))
157
158         worker.perform(self._index_sql(len(places)), values)
159
160
161
162 class PostcodeRunner:
163     """ Provides the SQL commands for indexing the location_postcode table.
164     """
165
166     @staticmethod
167     def name():
168         return "postcodes (location_postcode)"
169
170     @staticmethod
171     def sql_count_objects():
172         return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
173
174     @staticmethod
175     def sql_get_objects():
176         return """SELECT place_id FROM location_postcode
177                   WHERE indexed_status > 0
178                   ORDER BY country_code, postcode"""
179
180     @staticmethod
181     def index_places(worker, ids):
182         worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
183                                     WHERE place_id IN ({})""")
184                        .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))