]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/export.c
delete more hardcoded rules for special phrases
[nominatim.git] / nominatim / export.c
1 /*
2 */
3
4 #include <stdio.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <time.h>
10 #include <stdint.h>
11 #include <pthread.h>
12
13 #include <libpq-fe.h>
14
15 #include "nominatim.h"
16 #include "export.h"
17 #include "postgresql.h"
18
19 extern int verbose;
20
21 int mode = 0;
22
23 void nominatim_export(int rank_min, int rank_max, const char *conninfo, const char *structuredoutputfile)
24 {
25     xmlTextWriterPtr writer;
26
27     int rankTotalDone;
28
29     PGconn *conn;
30     PGresult * res;
31     PGresult * resSectors;
32     PGresult * resPlaces;
33
34     int rank;
35     int i;
36     int iSector;
37     int tuples;
38
39     const char *paramValues[2];
40     int         paramLengths[2];
41     int         paramFormats[2];
42     uint32_t    paramRank;
43     uint32_t    paramSector;
44     uint32_t    sector;
45
46     Oid pg_prepare_params[2];
47
48     conn = PQconnectdb(conninfo);
49     if (PQstatus(conn) != CONNECTION_OK)
50     {
51         fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
52         exit(EXIT_FAILURE);
53     }
54
55     pg_prepare_params[0] = PG_OID_INT4;
56     res = PQprepare(conn, "index_sectors",
57                     "select geometry_sector,count(*) from placex where rank_search = $1 and indexed_status = 0 group by geometry_sector order by geometry_sector",
58                     1, pg_prepare_params);
59     if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE);
60     PQclear(res);
61
62     pg_prepare_params[0] = PG_OID_INT4;
63     pg_prepare_params[1] = PG_OID_INT4;
64     res = PQprepare(conn, "index_sector_places",
65                     "select place_id from placex where rank_search = $1 and geometry_sector = $2",
66                     2, pg_prepare_params);
67     if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE);
68     PQclear(res);
69
70     nominatim_exportCreatePreparedQueries(conn);
71
72     // Create the output file
73     writer = nominatim_exportXMLStart(structuredoutputfile);
74
75     for (rank = rank_min; rank <= rank_max; rank++)
76     {
77         printf("Starting rank %d\n", rank);
78
79         paramRank = PGint32(rank);
80         paramValues[0] = (char *)&paramRank;
81         paramLengths[0] = sizeof(paramRank);
82         paramFormats[0] = 1;
83         resSectors = PQexecPrepared(conn, "index_sectors", 1, paramValues, paramLengths, paramFormats, 1);
84         if (PQresultStatus(resSectors) != PGRES_TUPLES_OK)
85         {
86             fprintf(stderr, "index_sectors: SELECT failed: %s", PQerrorMessage(conn));
87             PQclear(resSectors);
88             exit(EXIT_FAILURE);
89         }
90         if (PQftype(resSectors, 0) != PG_OID_INT4)
91         {
92             fprintf(stderr, "Sector value has unexpected type\n");
93             PQclear(resSectors);
94             exit(EXIT_FAILURE);
95         }
96         if (PQftype(resSectors, 1) != PG_OID_INT8)
97         {
98             fprintf(stderr, "Sector value has unexpected type\n");
99             PQclear(resSectors);
100             exit(EXIT_FAILURE);
101         }
102
103         rankTotalDone = 0;
104         for (iSector = 0; iSector < PQntuples(resSectors); iSector++)
105         {
106             sector = PGint32(*((uint32_t *)PQgetvalue(resSectors, iSector, 0)));
107
108             // Get all the place_id's for this sector
109             paramRank = PGint32(rank);
110             paramValues[0] = (char *)&paramRank;
111             paramLengths[0] = sizeof(paramRank);
112             paramFormats[0] = 1;
113             paramSector = PGint32(sector);
114             paramValues[1] = (char *)&paramSector;
115             paramLengths[1] = sizeof(paramSector);
116             paramFormats[1] = 1;
117             resPlaces = PQexecPrepared(conn, "index_sector_places", 2, paramValues, paramLengths, paramFormats, 1);
118             if (PQresultStatus(resPlaces) != PGRES_TUPLES_OK)
119             {
120                 fprintf(stderr, "index_sector_places: SELECT failed: %s", PQerrorMessage(conn));
121                 PQclear(resPlaces);
122                 exit(EXIT_FAILURE);
123             }
124             if (PQftype(resPlaces, 0) != PG_OID_INT8)
125             {
126                 fprintf(stderr, "Place_id value has unexpected type\n");
127                 PQclear(resPlaces);
128                 exit(EXIT_FAILURE);
129             }
130
131             tuples = PQntuples(resPlaces);
132             for (i = 0; i < tuples; i++)
133             {
134                 nominatim_exportPlace(PGint64(*((uint64_t *)PQgetvalue(resPlaces, i, 0))), conn, writer, NULL, NULL);
135                 rankTotalDone++;
136                 if (rankTotalDone%1000 == 0) printf("Done %i (k)\n", rankTotalDone/1000);
137             }
138             PQclear(resPlaces);
139         }
140         PQclear(resSectors);
141     }
142
143     nominatim_exportXMLEnd(writer);
144
145     PQfinish(conn);
146 }
147
148 void nominatim_exportCreatePreparedQueries(PGconn * conn)
149 {
150     Oid pg_prepare_params[2];
151     PGresult * res;
152
153     pg_prepare_params[0] = PG_OID_INT8;
154     res = PQprepare(conn, "placex_details",
155                     "select placex.osm_type, placex.osm_id, placex.class, placex.type, placex.name, placex.housenumber, placex.country_code, ST_AsText(placex.geometry), placex.admin_level, placex.rank_address, placex.rank_search, placex.parent_place_id, parent.osm_type, parent.osm_id, placex.indexed_status from placex left outer join placex as parent on (placex.parent_place_id = parent.place_id) where placex.place_id = $1",
156                     1, pg_prepare_params);
157     if (PQresultStatus(res) != PGRES_COMMAND_OK)
158     {
159         fprintf(stderr, "Error preparing placex_details: %s", PQerrorMessage(conn));
160         exit(EXIT_FAILURE);
161     }
162     PQclear(res);
163
164     pg_prepare_params[0] = PG_OID_INT8;
165     res = PQprepare(conn, "placex_address",
166                     "select osm_type,osm_id,class,type,distance,cached_rank_address,isaddress from place_addressline join placex on (address_place_id = placex.place_id) where place_addressline.place_id = $1 and address_place_id != place_addressline.place_id order by cached_rank_address asc,osm_type,osm_id",
167                     1, pg_prepare_params);
168     if (PQresultStatus(res) != PGRES_COMMAND_OK)
169     {
170         fprintf(stderr, "Error preparing placex_address: %s", PQerrorMessage(conn));
171         exit(EXIT_FAILURE);
172     }
173     PQclear(res);
174
175     pg_prepare_params[0] = PG_OID_INT8;
176     res = PQprepare(conn, "placex_names",
177                     "select (each(name)).key,(each(name)).value from (select name from placex where place_id = $1) as x order by (each(name)).key",
178                     1, pg_prepare_params);
179     if (PQresultStatus(res) != PGRES_COMMAND_OK)
180     {
181         fprintf(stderr, "Error preparing placex_names: %s", PQerrorMessage(conn));
182         exit(EXIT_FAILURE);
183     }
184     PQclear(res);
185
186     pg_prepare_params[0] = PG_OID_INT8;
187     res = PQprepare(conn, "placex_extratags",
188                     "select (each(extratags)).key,(each(extratags)).value from (select extratags from placex where place_id = $1) as x order by (each(extratags)).key",
189                     1, pg_prepare_params);
190     if (PQresultStatus(res) != PGRES_COMMAND_OK)
191     {
192         fprintf(stderr, "Error preparing placex_extratags: %s", PQerrorMessage(conn));
193         exit(EXIT_FAILURE);
194     }
195     PQclear(res);
196 }
197
198 xmlTextWriterPtr nominatim_exportXMLStart(const char *structuredoutputfile)
199 {
200     xmlTextWriterPtr writer;
201
202     writer = xmlNewTextWriterFilename(structuredoutputfile, 0);
203     if (writer==NULL)
204     {
205         fprintf(stderr, "Unable to open %s\n", structuredoutputfile);
206         exit(EXIT_FAILURE);
207     }
208     xmlTextWriterSetIndent(writer, 1);
209     if (xmlTextWriterStartDocument(writer, NULL, "UTF8", NULL) < 0)
210     {
211         fprintf(stderr, "xmlTextWriterStartDocument failed\n");
212         exit(EXIT_FAILURE);
213     }
214     if (xmlTextWriterStartElement(writer, BAD_CAST "osmStructured") < 0)
215     {
216         fprintf(stderr, "xmlTextWriterStartElement failed\n");
217         exit(EXIT_FAILURE);
218     }
219     if (xmlTextWriterWriteAttribute(writer, BAD_CAST "version", BAD_CAST "0.1") < 0)
220     {
221         fprintf(stderr, "xmlTextWriterWriteAttribute failed\n");
222         exit(EXIT_FAILURE);
223     }
224     if (xmlTextWriterWriteAttribute(writer, BAD_CAST "generator", BAD_CAST "Nominatim") < 0)
225     {
226         fprintf(stderr, "xmlTextWriterWriteAttribute failed\n");
227         exit(EXIT_FAILURE);
228     }
229
230     mode = 0;
231
232     return writer;
233 }
234
235 void nominatim_exportXMLEnd(xmlTextWriterPtr writer)
236 {
237     nominatim_exportEndMode(writer);
238
239     // End <osmStructured>
240     if (xmlTextWriterEndElement(writer) < 0)
241     {
242         fprintf(stderr, "xmlTextWriterEndElement failed\n");
243         exit(EXIT_FAILURE);
244     }
245     if (xmlTextWriterEndDocument(writer) < 0)
246     {
247         fprintf(stderr, "xmlTextWriterEndDocument failed\n");
248         exit(EXIT_FAILURE);
249     }
250     xmlFreeTextWriter(writer);
251 }
252
253 void nominatim_exportStartMode(xmlTextWriterPtr writer, int newMode)
254 {
255     if (mode == newMode) return;
256
257     nominatim_exportEndMode(writer);
258
259     switch(newMode)
260     {
261     case 0:
262         break;
263
264     case 1:
265         if (xmlTextWriterStartElement(writer, BAD_CAST "add") < 0)
266         {
267             fprintf(stderr, "xmlTextWriterStartElement failed\n");
268             exit(EXIT_FAILURE);
269         }
270         break;
271
272     case 2:
273         if (xmlTextWriterStartElement(writer, BAD_CAST "update") < 0)
274         {
275             fprintf(stderr, "xmlTextWriterStartElement failed\n");
276             exit(EXIT_FAILURE);
277         }
278         break;
279
280     case 3:
281         if (xmlTextWriterStartElement(writer, BAD_CAST "delete") < 0)
282         {
283             fprintf(stderr, "xmlTextWriterStartElement failed\n");
284             exit(EXIT_FAILURE);
285         }
286         break;
287     }
288     mode = newMode;
289 }
290
291 void nominatim_exportEndMode(xmlTextWriterPtr writer)
292 {
293     if (!mode) return;
294
295     if (xmlTextWriterEndElement(writer) < 0)
296     {
297         fprintf(stderr, "xmlTextWriterEndElement failed\n");
298         exit(EXIT_FAILURE);
299     }
300 }
301
302 void nominatim_exportPlaceQueries(uint64_t place_id, PGconn * conn, struct export_data * querySet)
303 {
304     const char *        paramValues[1];
305     int                 paramLengths[1];
306     int                 paramFormats[1];
307     uint64_t            paramPlaceID;
308
309     paramPlaceID = PGint64(place_id);
310     paramValues[0] = (char *)&paramPlaceID;
311     paramLengths[0] = sizeof(paramPlaceID);
312     paramFormats[0] = 1;
313
314     querySet->res = PQexecPrepared(conn, "placex_details", 1, paramValues, paramLengths, paramFormats, 0);
315     if (PQresultStatus(querySet->res) != PGRES_TUPLES_OK)
316     {
317         fprintf(stderr, "placex_details: SELECT failed: %s", PQerrorMessage(conn));
318         PQclear(querySet->res);
319         exit(EXIT_FAILURE);
320     }
321
322     querySet->resNames = PQexecPrepared(conn, "placex_names", 1, paramValues, paramLengths, paramFormats, 0);
323     if (PQresultStatus(querySet->resNames) != PGRES_TUPLES_OK)
324     {
325         fprintf(stderr, "placex_names: SELECT failed: %s", PQerrorMessage(conn));
326         PQclear(querySet->resNames);
327         exit(EXIT_FAILURE);
328     }
329
330     querySet->resAddress = PQexecPrepared(conn, "placex_address", 1, paramValues, paramLengths, paramFormats, 0);
331     if (PQresultStatus(querySet->resAddress) != PGRES_TUPLES_OK)
332     {
333         fprintf(stderr, "placex_address: SELECT failed: %s", PQerrorMessage(conn));
334         PQclear(querySet->resAddress);
335         exit(EXIT_FAILURE);
336     }
337
338     querySet->resExtraTags = PQexecPrepared(conn, "placex_extratags", 1, paramValues, paramLengths, paramFormats, 0);
339     if (PQresultStatus(querySet->resExtraTags) != PGRES_TUPLES_OK)
340     {
341         fprintf(stderr, "placex_extratags: SELECT failed: %s", PQerrorMessage(conn));
342         PQclear(querySet->resExtraTags);
343         exit(EXIT_FAILURE);
344     }
345 }
346
347 void nominatim_exportFreeQueries(struct export_data * querySet)
348 {
349     PQclear(querySet->res);
350     PQclear(querySet->resNames);
351     PQclear(querySet->resAddress);
352     PQclear(querySet->resExtraTags);
353 }
354
355 /*
356  * Requirements: the prepared queries must exist
357  */
358 void nominatim_exportPlace(uint64_t place_id, PGconn * conn, 
359   xmlTextWriterPtr writer, pthread_mutex_t * writer_mutex, struct export_data * prevQuerySet)
360 {
361     struct export_data          querySet;
362
363     int                         i;
364
365     nominatim_exportPlaceQueries(place_id, conn, &querySet);
366
367     // Add, modify or delete?
368     if (prevQuerySet)
369     {
370         if ((PQgetvalue(prevQuerySet->res, 0, 14) && strcmp(PQgetvalue(prevQuerySet->res, 0, 14), "100") == 0) || PQntuples(querySet.res) == 0)
371         {
372             // Delete
373             if (writer_mutex) pthread_mutex_lock( writer_mutex );
374             nominatim_exportStartMode(writer, 3);
375             xmlTextWriterStartElement(writer, BAD_CAST "feature");
376             xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "place_id", "%li", place_id);
377             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 0));
378             xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 1));
379             xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 2));
380             xmlTextWriterWriteAttribute(writer, BAD_CAST "value", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 3));
381             xmlTextWriterEndElement(writer);
382             if (writer_mutex) pthread_mutex_unlock( writer_mutex );
383             nominatim_exportFreeQueries(&querySet);
384             return;
385         }
386         if (PQgetvalue(prevQuerySet->res, 0, 14) && strcmp(PQgetvalue(prevQuerySet->res, 0, 14), "1") == 0)
387         {
388             // Add
389             if (writer_mutex) pthread_mutex_lock( writer_mutex );
390             nominatim_exportStartMode(writer, 1);  
391         }
392         else
393         {
394             // Update, but only if something has changed
395
396             // TODO: detect changes
397
398             if (writer_mutex) pthread_mutex_lock( writer_mutex );
399             nominatim_exportStartMode(writer, 2);  
400         }
401     }
402     else
403     {
404        // Add
405        if (writer_mutex) pthread_mutex_lock( writer_mutex );
406        nominatim_exportStartMode(writer, 1);  
407     }
408
409     xmlTextWriterStartElement(writer, BAD_CAST "feature");
410     xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "place_id", "%li", place_id);
411     xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.res, 0, 0));
412     xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST PQgetvalue(querySet.res, 0, 1));
413     xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST PQgetvalue(querySet.res, 0, 2));
414     xmlTextWriterWriteAttribute(writer, BAD_CAST "value", BAD_CAST PQgetvalue(querySet.res, 0, 3));
415     xmlTextWriterWriteAttribute(writer, BAD_CAST "rank", BAD_CAST PQgetvalue(querySet.res, 0, 9));
416     xmlTextWriterWriteAttribute(writer, BAD_CAST "importance", BAD_CAST PQgetvalue(querySet.res, 0, 10));
417     xmlTextWriterWriteAttribute(writer, BAD_CAST "parent_place_id", BAD_CAST PQgetvalue(querySet.res, 0, 11));
418     xmlTextWriterWriteAttribute(writer, BAD_CAST "parent_type", BAD_CAST PQgetvalue(querySet.res, 0, 12));
419     xmlTextWriterWriteAttribute(writer, BAD_CAST "parent_id", BAD_CAST PQgetvalue(querySet.res, 0, 13));
420
421     if (PQntuples(querySet.resNames))
422     {
423         xmlTextWriterStartElement(writer, BAD_CAST "names");
424
425         for (i = 0; i < PQntuples(querySet.resNames); i++)
426         {
427             xmlTextWriterStartElement(writer, BAD_CAST "name");
428             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.resNames, i, 0));
429             xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.resNames, i, 1));
430             xmlTextWriterEndElement(writer);
431         }
432
433         xmlTextWriterEndElement(writer);
434     }
435
436     if (PQgetvalue(querySet.res, 0, 5) && strlen(PQgetvalue(querySet.res, 0, 5)))
437     {
438         xmlTextWriterStartElement(writer, BAD_CAST "houseNumber");
439         xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 5));
440         xmlTextWriterEndElement(writer);
441     }
442
443     if (PQgetvalue(querySet.res, 0, 8) && strlen(PQgetvalue(querySet.res, 0, 8)))
444     {
445         xmlTextWriterStartElement(writer, BAD_CAST "adminLevel");
446         xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 8));
447         xmlTextWriterEndElement(writer);
448     }
449
450     if (PQgetvalue(querySet.res, 0, 6) && strlen(PQgetvalue(querySet.res, 0, 6)))
451     {
452         xmlTextWriterStartElement(writer, BAD_CAST "countryCode");
453         xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 6));
454         xmlTextWriterEndElement(writer);
455     }
456
457     if (PQntuples(querySet.resAddress) > 0)
458     {
459         xmlTextWriterStartElement(writer, BAD_CAST "address");
460         for (i = 0; i < PQntuples(querySet.resAddress); i++)
461         {
462             xmlTextWriterStartElement(writer, BAD_CAST getRankLabel(atoi(PQgetvalue(querySet.resAddress, i, 5))));
463             xmlTextWriterWriteAttribute(writer, BAD_CAST "rank", BAD_CAST PQgetvalue(querySet.resAddress, i, 5));
464             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.resAddress, i, 0));
465             xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST PQgetvalue(querySet.resAddress, i, 1));
466             xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST PQgetvalue(querySet.resAddress, i, 2));
467             xmlTextWriterWriteAttribute(writer, BAD_CAST "value", BAD_CAST PQgetvalue(querySet.resAddress, i, 3));
468             xmlTextWriterWriteAttribute(writer, BAD_CAST "distance", BAD_CAST PQgetvalue(querySet.resAddress, i, 4));
469             xmlTextWriterWriteAttribute(writer, BAD_CAST "isaddress", BAD_CAST PQgetvalue(querySet.resAddress, i, 6));
470             xmlTextWriterEndElement(writer);
471         }
472         xmlTextWriterEndElement(writer);
473     }
474
475     if (PQntuples(querySet.resExtraTags))
476     {
477         xmlTextWriterStartElement(writer, BAD_CAST "tags");
478
479         for (i = 0; i < PQntuples(querySet.resExtraTags); i++)
480         {
481             xmlTextWriterStartElement(writer, BAD_CAST "tag");
482             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.resExtraTags, i, 0));
483             xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.resExtraTags, i, 1));
484             xmlTextWriterEndElement(writer);
485         }
486
487         xmlTextWriterEndElement(writer);
488     }
489
490
491     xmlTextWriterStartElement(writer, BAD_CAST "osmGeometry");
492     xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 7));
493     xmlTextWriterEndElement(writer);
494
495     xmlTextWriterEndElement(writer); // </feature>
496
497     if (writer_mutex) pthread_mutex_unlock( writer_mutex );
498
499     nominatim_exportFreeQueries(&querySet);
500 }
501
502 const char * getRankLabel(int rank)
503 {
504     switch (rank)
505     {
506     case 0:
507     case 1:
508         return "continent";
509     case 2:
510     case 3:
511         return "sea";
512     case 4:
513     case 5:
514     case 6:
515     case 7:
516         return "country";
517     case 8:
518     case 9:
519     case 10:
520     case 11:
521         return "state";
522     case 12:
523     case 13:
524     case 14:
525     case 15:
526         return "county";
527     case 16:
528         return "city";
529     case 17:
530         return "town";
531     case 18:
532         return "village";
533     case 19:
534         return "unknown";
535     case 20:
536         return "suburb";
537     case 21:
538         return "postcode";
539     case 22:
540         return "neighborhood";
541     case 23:
542         return "postcode";
543     case 24:
544         return "unknown";
545     case 25:
546         return "postcode";
547     case 26:
548         return "street";
549     case 27:
550         return "access";
551     case 28:
552         return "building";
553     case 29:
554     default:
555         return "other";
556     }
557 }