]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/indexer/runners.py
Merge pull request #2458 from lonvia/add-tokenizer-preprocessing
[nominatim.git] / nominatim / indexer / runners.py
1 """
2 Mix-ins that provide the actual commands for the indexer for various indexing
3 tasks.
4 """
5 import functools
6
7 from psycopg2 import sql as pysql
8
9 from nominatim.indexer.place_info import PlaceInfo
10
11 # pylint: disable=C0111
12
13 def _mk_valuelist(template, num):
14     return pysql.SQL(',').join([pysql.SQL(template)] * num)
15
16
17 class AbstractPlacexRunner:
18     """ Returns SQL commands for indexing of the placex table.
19     """
20     SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
21     UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
22
23     def __init__(self, rank, analyzer):
24         self.rank = rank
25         self.analyzer = analyzer
26
27
28     @staticmethod
29     @functools.lru_cache(maxsize=1)
30     def _index_sql(num_places):
31         return pysql.SQL(
32             """ UPDATE placex
33                 SET indexed_status = 0, address = v.addr, token_info = v.ti,
34                     name = v.name, linked_place_id = v.linked_place_id
35                 FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
36                 WHERE place_id = v.id
37             """).format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places))
38
39
40     @staticmethod
41     def get_place_details(worker, ids):
42         worker.perform("""SELECT place_id, (placex_indexing_prepare(placex)).*
43                           FROM placex WHERE place_id IN %s""",
44                        (tuple((p[0] for p in ids)), ))
45
46
47     def index_places(self, worker, places):
48         values = []
49         for place in places:
50             for field in ('place_id', 'name', 'address', 'linked_place_id'):
51                 values.append(place[field])
52             values.append(PlaceInfo(place).analyze(self.analyzer))
53
54         worker.perform(self._index_sql(len(places)), values)
55
56
57 class RankRunner(AbstractPlacexRunner):
58     """ Returns SQL commands for indexing one rank within the placex table.
59     """
60
61     def name(self):
62         return "rank {}".format(self.rank)
63
64     def sql_count_objects(self):
65         return pysql.SQL("""SELECT count(*) FROM placex
66                             WHERE rank_address = {} and indexed_status > 0
67                          """).format(pysql.Literal(self.rank))
68
69     def sql_get_objects(self):
70         return self.SELECT_SQL + pysql.SQL(
71             """WHERE indexed_status > 0 and rank_address = {}
72                ORDER BY geometry_sector
73             """).format(pysql.Literal(self.rank))
74
75
76 class BoundaryRunner(AbstractPlacexRunner):
77     """ Returns SQL commands for indexing the administrative boundaries
78         of a certain rank.
79     """
80
81     def name(self):
82         return "boundaries rank {}".format(self.rank)
83
84     def sql_count_objects(self):
85         return pysql.SQL("""SELECT count(*) FROM placex
86                             WHERE indexed_status > 0
87                               AND rank_search = {}
88                               AND class = 'boundary' and type = 'administrative'
89                          """).format(pysql.Literal(self.rank))
90
91     def sql_get_objects(self):
92         return self.SELECT_SQL + pysql.SQL(
93             """WHERE indexed_status > 0 and rank_search = {}
94                      and class = 'boundary' and type = 'administrative'
95                ORDER BY partition, admin_level
96             """).format(pysql.Literal(self.rank))
97
98
99 class InterpolationRunner:
100     """ Returns SQL commands for indexing the address interpolation table
101         location_property_osmline.
102     """
103
104     def __init__(self, analyzer):
105         self.analyzer = analyzer
106
107
108     @staticmethod
109     def name():
110         return "interpolation lines (location_property_osmline)"
111
112     @staticmethod
113     def sql_count_objects():
114         return """SELECT count(*) FROM location_property_osmline
115                   WHERE indexed_status > 0"""
116
117     @staticmethod
118     def sql_get_objects():
119         return """SELECT place_id
120                   FROM location_property_osmline
121                   WHERE indexed_status > 0
122                   ORDER BY geometry_sector"""
123
124
125     @staticmethod
126     def get_place_details(worker, ids):
127         worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
128                           FROM location_property_osmline WHERE place_id IN %s""",
129                        (tuple((p[0] for p in ids)), ))
130
131
132     @staticmethod
133     @functools.lru_cache(maxsize=1)
134     def _index_sql(num_places):
135         return pysql.SQL("""UPDATE location_property_osmline
136                             SET indexed_status = 0, address = v.addr, token_info = v.ti
137                             FROM (VALUES {}) as v(id, addr, ti)
138                             WHERE place_id = v.id
139                          """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
140
141
142     def index_places(self, worker, places):
143         values = []
144         for place in places:
145             values.extend((place[x] for x in ('place_id', 'address')))
146             values.append(PlaceInfo(place).analyze(self.analyzer))
147
148         worker.perform(self._index_sql(len(places)), values)
149
150
151
152 class PostcodeRunner:
153     """ Provides the SQL commands for indexing the location_postcode table.
154     """
155
156     @staticmethod
157     def name():
158         return "postcodes (location_postcode)"
159
160     @staticmethod
161     def sql_count_objects():
162         return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
163
164     @staticmethod
165     def sql_get_objects():
166         return """SELECT place_id FROM location_postcode
167                   WHERE indexed_status > 0
168                   ORDER BY country_code, postcode"""
169
170     @staticmethod
171     def index_places(worker, ids):
172         worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
173                                     WHERE place_id IN ({})""")
174                        .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))