]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/indexer/runners.py
extract address tokens in tokenizer
[nominatim.git] / nominatim / indexer / runners.py
1 """
2 Mix-ins that provide the actual commands for the indexer for various indexing
3 tasks.
4 """
5 import functools
6
7 import psycopg2.extras
8
9 # pylint: disable=C0111
10
11 class AbstractPlacexRunner:
12     """ Returns SQL commands for indexing of the placex table.
13     """
14     SELECT_SQL = 'SELECT place_id, (placex_prepare_update(placex)).* FROM placex'
15
16     def __init__(self, rank, analyzer):
17         self.rank = rank
18         self.analyzer = analyzer
19
20
21     @staticmethod
22     @functools.lru_cache(maxsize=1)
23     def _index_sql(num_places):
24         return """ UPDATE placex
25                    SET indexed_status = 0, address = v.addr, token_info = v.ti
26                    FROM (VALUES {}) as v(id, addr, ti)
27                    WHERE place_id = v.id
28                """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
29
30
31     def index_places(self, worker, places):
32         values = []
33         for place in places:
34             values.extend((place[x] for x in ('place_id', 'address')))
35             values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
36
37         worker.perform(self._index_sql(len(places)), values)
38
39
40 class RankRunner(AbstractPlacexRunner):
41     """ Returns SQL commands for indexing one rank within the placex table.
42     """
43
44     def name(self):
45         return "rank {}".format(self.rank)
46
47     def sql_count_objects(self):
48         return """SELECT count(*) FROM placex
49                   WHERE rank_address = {} and indexed_status > 0
50                """.format(self.rank)
51
52     def sql_get_objects(self):
53         return """{} WHERE indexed_status > 0 and rank_address = {}
54                      ORDER BY geometry_sector
55                """.format(self.SELECT_SQL, self.rank)
56
57
58 class BoundaryRunner(AbstractPlacexRunner):
59     """ Returns SQL commands for indexing the administrative boundaries
60         of a certain rank.
61     """
62
63     def name(self):
64         return "boundaries rank {}".format(self.rank)
65
66     def sql_count_objects(self):
67         return """SELECT count(*) FROM placex
68                   WHERE indexed_status > 0
69                     AND rank_search = {}
70                     AND class = 'boundary' and type = 'administrative'
71                """.format(self.rank)
72
73     def sql_get_objects(self):
74         return """{} WHERE indexed_status > 0 and rank_search = {}
75                            and class = 'boundary' and type = 'administrative'
76                      ORDER BY partition, admin_level
77                """.format(self.SELECT_SQL, self.rank)
78
79
80 class InterpolationRunner:
81     """ Returns SQL commands for indexing the address interpolation table
82         location_property_osmline.
83     """
84
85     def __init__(self, analyzer):
86         self.analyzer = analyzer
87
88
89     @staticmethod
90     def name():
91         return "interpolation lines (location_property_osmline)"
92
93     @staticmethod
94     def sql_count_objects():
95         return """SELECT count(*) FROM location_property_osmline
96                   WHERE indexed_status > 0"""
97
98     @staticmethod
99     def sql_get_objects():
100         return """SELECT place_id, get_interpolation_address(address, osm_id) as address
101                   FROM location_property_osmline
102                   WHERE indexed_status > 0
103                   ORDER BY geometry_sector"""
104
105
106     @staticmethod
107     @functools.lru_cache(maxsize=1)
108     def _index_sql(num_places):
109         return """ UPDATE location_property_osmline
110                    SET indexed_status = 0, address = v.addr, token_info = v.ti
111                    FROM (VALUES {}) as v(id, addr, ti)
112                    WHERE place_id = v.id
113                """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
114
115
116     def index_places(self, worker, places):
117         values = []
118         for place in places:
119             values.extend((place[x] for x in ('place_id', 'address')))
120             values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
121
122         worker.perform(self._index_sql(len(places)), values)
123
124
125
126 class PostcodeRunner:
127     """ Provides the SQL commands for indexing the location_postcode table.
128     """
129
130     @staticmethod
131     def name():
132         return "postcodes (location_postcode)"
133
134     @staticmethod
135     def sql_count_objects():
136         return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0'
137
138     @staticmethod
139     def sql_get_objects():
140         return """SELECT place_id FROM location_postcode
141                   WHERE indexed_status > 0
142                   ORDER BY country_code, postcode"""
143
144     @staticmethod
145     def index_places(worker, ids):
146         worker.perform(""" UPDATE location_postcode SET indexed_status = 0
147                            WHERE place_id IN ({})
148                        """.format(','.join((str(i[0]) for i in ids))))