]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/index.c
postcode/zipcode improvements, finish work on handling extratags
[nominatim.git] / nominatim / index.c
1 /*
2 */
3
4 #include <stdio.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <pthread.h>
10 #include <time.h>
11 #include <stdint.h>
12
13 #include <libpq-fe.h>
14
15 #include "nominatim.h"
16 #include "index.h"
17 #include "export.h"
18 #include "postgresql.h"
19
20 extern int verbose;
21
22 void nominatim_index(int rank_min, int rank_max, int num_threads, const char *conninfo, const char *structuredoutputfile)
23 {
24         struct index_thread_data * thread_data;
25         pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;
26         int tuples, count, sleepcount;
27
28         time_t rankStartTime;
29         int rankTotalTuples;
30         int rankCountTuples;
31         float rankPerSecond;
32
33         PGconn *conn;
34         PGresult * res;
35         PGresult * resSectors;
36         PGresult * resPlaces;
37
38         int rank;
39         int i;
40         int iSector;
41
42     const char *paramValues[2];
43     int         paramLengths[2];
44     int         paramFormats[2];
45     uint32_t    paramRank;
46     uint32_t    paramSector;
47     uint32_t    sector;
48
49     xmlTextWriterPtr writer;
50         pthread_mutex_t writer_mutex = PTHREAD_MUTEX_INITIALIZER;
51
52     Oid pg_prepare_params[2];
53
54     conn = PQconnectdb(conninfo);
55     if (PQstatus(conn) != CONNECTION_OK) {
56         fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
57         exit(EXIT_FAILURE);
58     }
59
60     pg_prepare_params[0] = PG_OID_INT4;
61     res = PQprepare(conn, "index_sectors",
62         "select geometry_sector,count(*) from placex where rank_search = $1 and indexed_status > 0 group by geometry_sector order by geometry_sector",
63         1, pg_prepare_params);
64     if (PQresultStatus(res) != PGRES_COMMAND_OK)
65     {
66         fprintf(stderr, "Failed preparing index_sectors: %s\n", PQerrorMessage(conn));
67         exit(EXIT_FAILURE);
68     }
69     PQclear(res);
70
71     pg_prepare_params[0] = PG_OID_INT4;
72     res = PQprepare(conn, "index_nosectors",
73         "select 0::integer,count(*) from placex where rank_search = $1 and indexed_status > 0",
74         1, pg_prepare_params);
75     if (PQresultStatus(res) != PGRES_COMMAND_OK)
76     {
77         fprintf(stderr, "Failed preparing index_sectors: %s\n", PQerrorMessage(conn));
78         exit(EXIT_FAILURE);
79     }
80     PQclear(res);
81
82     pg_prepare_params[0] = PG_OID_INT4;
83     pg_prepare_params[1] = PG_OID_INT4;
84     res = PQprepare(conn, "index_sector_places",
85         "select place_id from placex where rank_search = $1 and geometry_sector = $2 and indexed_status > 0",
86         2, pg_prepare_params);
87     if (PQresultStatus(res) != PGRES_COMMAND_OK)
88     {
89         fprintf(stderr, "Failed preparing index_sector_places: %s\n", PQerrorMessage(conn));
90         exit(EXIT_FAILURE);
91     }
92     PQclear(res);
93
94     pg_prepare_params[0] = PG_OID_INT4;
95     res = PQprepare(conn, "index_nosector_places",
96         "select place_id from placex where rank_search = $1 and indexed_status > 0 order by geometry_sector",
97         1, pg_prepare_params);
98     if (PQresultStatus(res) != PGRES_COMMAND_OK)
99     {
100         fprintf(stderr, "Failed preparing index_nosector_places: %s\n", PQerrorMessage(conn));
101         exit(EXIT_FAILURE);
102     }
103     PQclear(res);
104
105     // Build the data for each thread
106     thread_data = (struct index_thread_data *)malloc(sizeof(struct index_thread_data)*num_threads);
107         for (i = 0; i < num_threads; i++)
108         {
109                 thread_data[i].conn = PQconnectdb(conninfo);
110             if (PQstatus(thread_data[i].conn) != CONNECTION_OK) {
111                 fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(thread_data[i].conn));
112                 exit(EXIT_FAILURE);
113             }
114
115             pg_prepare_params[0] = PG_OID_INT4;
116             res = PQprepare(thread_data[i].conn, "index_placex",
117                 "update placex set indexed_status = 0 where place_id = $1",
118                 1, pg_prepare_params);
119             if (PQresultStatus(res) != PGRES_COMMAND_OK)
120             {
121                 fprintf(stderr, "Failed preparing index_placex: %s\n", PQerrorMessage(conn));
122                 exit(EXIT_FAILURE);
123             }
124             PQclear(res);
125
126                 res = PQexec(thread_data[i].conn, "set enable_seqscan = false");
127                 if (PQresultStatus(res) != PGRES_COMMAND_OK)
128             {
129                 fprintf(stderr, "Failed disabling sequential scan: %s\n", PQerrorMessage(conn));
130                 exit(EXIT_FAILURE);
131             }
132             PQclear(res);
133
134             nominatim_exportCreatePreparedQueries(thread_data[i].conn);
135         }
136
137         // Create the output file
138         writer = NULL;
139         if (structuredoutputfile)
140         {
141                 writer = nominatim_exportXMLStart(structuredoutputfile);
142         }
143
144     fprintf(stderr, "Starting indexing rank (%i to %i) using %i treads\n", rank_min, rank_max, num_threads);
145
146     for (rank = rank_min; rank <= rank_max; rank++)
147     {
148         printf("Starting rank %d\n", rank);
149         rankCountTuples = 0;
150         rankPerSecond = 0;
151
152         paramRank = PGint32(rank);
153         paramValues[0] = (char *)&paramRank;
154         paramLengths[0] = sizeof(paramRank);
155         paramFormats[0] = 1;
156                 if (rank < 16)
157                 resSectors = PQexecPrepared(conn, "index_nosectors", 1, paramValues, paramLengths, paramFormats, 1);
158                 else
159                 resSectors = PQexecPrepared(conn, "index_sectors", 1, paramValues, paramLengths, paramFormats, 1);
160         if (PQresultStatus(resSectors) != PGRES_TUPLES_OK)
161         {
162             fprintf(stderr, "index_sectors: SELECT failed: %s", PQerrorMessage(conn));
163             PQclear(resSectors);
164             exit(EXIT_FAILURE);
165         }
166                 if (PQftype(resSectors, 0) != PG_OID_INT4)
167                 {
168             fprintf(stderr, "Sector value has unexpected type\n");
169             PQclear(resSectors);
170             exit(EXIT_FAILURE);
171                 }
172                 if (PQftype(resSectors, 1) != PG_OID_INT8)
173                 {
174             fprintf(stderr, "Sector value has unexpected type\n");
175             PQclear(resSectors);
176             exit(EXIT_FAILURE);
177                 }
178
179                 rankTotalTuples = 0;
180         for (iSector = 0; iSector < PQntuples(resSectors); iSector++)
181         {
182                 rankTotalTuples += PGint64(*((uint64_t *)PQgetvalue(resSectors, iSector, 1)));
183         }
184
185         rankStartTime = time(0);
186         for (iSector = 0; iSector < PQntuples(resSectors); iSector++)
187         {
188                         sector = PGint32(*((uint32_t *)PQgetvalue(resSectors, iSector, 0)));
189                 //printf("\n Starting sector %d size %ld\n", sector, PGint64(*((uint64_t *)PQgetvalue(resSectors, iSector, 1))));
190
191                         // Get all the place_id's for this sector
192                     paramRank = PGint32(rank);
193                 paramValues[0] = (char *)&paramRank;
194                     paramLengths[0] = sizeof(paramRank);
195                     paramFormats[0] = 1;
196                 paramSector = PGint32(sector);
197                 paramValues[1] = (char *)&paramSector;
198                         paramLengths[1] = sizeof(paramSector);
199                     paramFormats[1] = 1;
200                         if (rank < 16)
201                         resPlaces = PQexecPrepared(conn, "index_nosector_places", 1, paramValues, paramLengths, paramFormats, 1);
202                         else
203                         resPlaces = PQexecPrepared(conn, "index_sector_places", 2, paramValues, paramLengths, paramFormats, 1);
204                 if (PQresultStatus(resPlaces) != PGRES_TUPLES_OK)
205                 {
206                     fprintf(stderr, "index_sector_places: SELECT failed: %s", PQerrorMessage(conn));
207                     PQclear(resPlaces);
208                     exit(EXIT_FAILURE);
209                 }
210                         if (PQftype(resPlaces, 0) != PG_OID_INT4)
211                         {
212                     fprintf(stderr, "Place_id value has unexpected type\n");
213                     PQclear(resPlaces);
214                     exit(EXIT_FAILURE);
215                         }
216
217                         count = 0;
218                         rankPerSecond = 0;
219                         tuples = PQntuples(resPlaces);
220
221                         if (tuples > 0)
222                 {
223                                 // Spawn threads
224                                 for (i = 0; i < num_threads; i++)
225                                 {
226                                         thread_data[i].res = resPlaces;
227                                         thread_data[i].tuples = tuples;
228                                         thread_data[i].count = &count;
229                                         thread_data[i].count_mutex = &count_mutex;
230                                         thread_data[i].writer = writer;
231                                         thread_data[i].writer_mutex = &writer_mutex;
232                                         pthread_create(&thread_data[i].thread, NULL, &nominatim_indexThread, (void *)&thread_data[i]);
233                                 }
234
235                                 // Monitor threads to give user feedback
236                                 sleepcount = 0;
237                                 while(count < tuples)
238                                 {
239                                         usleep(1000);
240
241                                         // Aim for one update per second
242                                         if (sleepcount++ > 500)
243                                         {
244                                                 rankPerSecond = ((float)rankCountTuples + (float)count) / MAX(difftime(time(0), rankStartTime),1);
245                                                 printf("  Done %i in %i @ %f per second - Rank %i ETA (seconds): %f\n", (rankCountTuples + count), (int)(difftime(time(0), rankStartTime)), rankPerSecond, rank, ((float)(rankTotalTuples - (rankCountTuples + count)))/rankPerSecond);
246                                                 sleepcount = 0;
247                                         }
248                                 }
249
250                                 // Wait for everything to finish
251                                 for (i = 0; i < num_threads; i++)
252                                 {
253                                         pthread_join(thread_data[i].thread, NULL);
254                                 }
255
256                                 rankCountTuples += tuples;
257                 }
258
259                         // Finished sector
260                         rankPerSecond = (float)rankCountTuples / MAX(difftime(time(0), rankStartTime),1);
261                         printf("  Done %i in %i @ %f per second - ETA (seconds): %f\n", rankCountTuples, (int)(difftime(time(0), rankStartTime)), rankPerSecond, ((float)(rankTotalTuples - rankCountTuples))/rankPerSecond);
262
263             PQclear(resPlaces);
264
265         }
266         // Finished rank
267                 printf("\r  Done %i in %i @ %f per second - FINISHED                      \n\n", rankCountTuples, (int)(difftime(time(0), rankStartTime)), rankPerSecond);
268
269         PQclear(resSectors);
270     }
271
272     if (writer)
273     {
274         nominatim_exportXMLEnd(writer);
275     }
276 }
277
278 void *nominatim_indexThread(void * thread_data_in)
279 {
280         struct index_thread_data * thread_data = (struct index_thread_data * )thread_data_in;
281
282         PGresult   *res;
283
284     const char *paramValues[1];
285     int         paramLengths[1];
286     int         paramFormats[1];
287     uint32_t    paramPlaceID;
288     uint32_t    place_id;
289         time_t          updateStartTime;
290
291         while(1)
292         {
293                 pthread_mutex_lock( thread_data->count_mutex );
294                 if (*(thread_data->count) >= thread_data->tuples)
295                 {
296                         pthread_mutex_unlock( thread_data->count_mutex );
297                         break;
298                 }
299
300                 place_id = PGint32(*((uint32_t *)PQgetvalue(thread_data->res, *thread_data->count, 0)));
301                 (*thread_data->count)++;
302
303                 pthread_mutex_unlock( thread_data->count_mutex );
304
305                 if (verbose) printf("  Processing place_id %d\n", place_id);
306                 
307                 updateStartTime = time(0);
308                 paramPlaceID = PGint32(place_id);
309         paramValues[0] = (char *)&paramPlaceID;
310         paramLengths[0] = sizeof(paramPlaceID);
311         paramFormats[0] = 1;
312         res = PQexecPrepared(thread_data->conn, "index_placex", 1, paramValues, paramLengths, paramFormats, 1);
313         if (PQresultStatus(res) != PGRES_COMMAND_OK)
314         {
315             fprintf(stderr, "index_placex: UPDATE failed: %s", PQerrorMessage(thread_data->conn));
316             PQclear(res);
317             exit(EXIT_FAILURE);
318         }
319         PQclear(res);
320                 if (difftime(time(0), updateStartTime) > 1) printf("  Slow place_id %d\n", place_id);
321
322         if (thread_data->writer)
323         {
324                 nominatim_exportPlace(place_id, thread_data->conn, thread_data->writer, thread_data->writer_mutex);
325         }
326         }
327
328         return NULL;
329 }