]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/export.c
Merge pull request #1502 from mtmail/specialphrases-quotes
[nominatim.git] / nominatim / export.c
1 /*
2 */
3
4 #include <stdio.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <time.h>
10 #include <stdint.h>
11 #include <pthread.h>
12
13 #include <libpq-fe.h>
14
15 #include "nominatim.h"
16 #include "export.h"
17 #include "postgresql.h"
18
19 extern int verbose;
20
21 int mode = 0;
22
23 void nominatim_export(int rank_min, int rank_max, const char *conninfo, const char *structuredoutputfile)
24 {
25     xmlTextWriterPtr writer;
26
27     int rankTotalDone;
28
29     PGconn *conn;
30     PGresult * res;
31     PGresult * resSectors;
32     PGresult * resPlaces;
33
34     int rank;
35     int i;
36     int iSector;
37     int tuples;
38
39     const char *paramValues[2];
40     int         paramLengths[2];
41     int         paramFormats[2];
42     uint32_t    paramRank;
43     uint32_t    paramSector;
44     uint32_t    sector;
45
46     Oid pg_prepare_params[2];
47
48     conn = PQconnectdb(conninfo);
49     if (PQstatus(conn) != CONNECTION_OK)
50     {
51         fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
52         exit(EXIT_FAILURE);
53     }
54
55     pg_prepare_params[0] = PG_OID_INT4;
56     res = PQprepare(conn, "index_sectors",
57                     "select geometry_sector,count(*) from placex where rank_search = $1 and indexed_status = 0 group by geometry_sector order by geometry_sector",
58                     1, pg_prepare_params);
59     if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE);
60     PQclear(res);
61
62     pg_prepare_params[0] = PG_OID_INT4;
63     pg_prepare_params[1] = PG_OID_INT4;
64     res = PQprepare(conn, "index_sector_places",
65                     "select place_id from placex where rank_search = $1 and geometry_sector = $2",
66                     2, pg_prepare_params);
67     if (PQresultStatus(res) != PGRES_COMMAND_OK) exit(EXIT_FAILURE);
68     PQclear(res);
69
70     nominatim_exportCreatePreparedQueries(conn);
71
72     // Create the output file
73     writer = nominatim_exportXMLStart(structuredoutputfile);
74
75     for (rank = rank_min; rank <= rank_max; rank++)
76     {
77         printf("Starting rank %d\n", rank);
78
79         paramRank = PGint32(rank);
80         paramValues[0] = (char *)&paramRank;
81         paramLengths[0] = sizeof(paramRank);
82         paramFormats[0] = 1;
83         resSectors = PQexecPrepared(conn, "index_sectors", 1, paramValues, paramLengths, paramFormats, 1);
84         if (PQresultStatus(resSectors) != PGRES_TUPLES_OK)
85         {
86             fprintf(stderr, "index_sectors: SELECT failed: %s", PQerrorMessage(conn));
87             PQclear(resSectors);
88             exit(EXIT_FAILURE);
89         }
90         if (PQftype(resSectors, 0) != PG_OID_INT4)
91         {
92             fprintf(stderr, "Sector value has unexpected type\n");
93             PQclear(resSectors);
94             exit(EXIT_FAILURE);
95         }
96         if (PQftype(resSectors, 1) != PG_OID_INT8)
97         {
98             fprintf(stderr, "Sector value has unexpected type\n");
99             PQclear(resSectors);
100             exit(EXIT_FAILURE);
101         }
102
103         rankTotalDone = 0;
104         for (iSector = 0; iSector < PQntuples(resSectors); iSector++)
105         {
106             sector = PGint32(*((uint32_t *)PQgetvalue(resSectors, iSector, 0)));
107
108             // Get all the place_id's for this sector
109             paramRank = PGint32(rank);
110             paramValues[0] = (char *)&paramRank;
111             paramLengths[0] = sizeof(paramRank);
112             paramFormats[0] = 1;
113             paramSector = PGint32(sector);
114             paramValues[1] = (char *)&paramSector;
115             paramLengths[1] = sizeof(paramSector);
116             paramFormats[1] = 1;
117             resPlaces = PQexecPrepared(conn, "index_sector_places", 2, paramValues, paramLengths, paramFormats, 1);
118             if (PQresultStatus(resPlaces) != PGRES_TUPLES_OK)
119             {
120                 fprintf(stderr, "index_sector_places: SELECT failed: %s", PQerrorMessage(conn));
121                 PQclear(resPlaces);
122                 exit(EXIT_FAILURE);
123             }
124             if (PQftype(resPlaces, 0) != PG_OID_INT8)
125             {
126                 fprintf(stderr, "Place_id value has unexpected type\n");
127                 PQclear(resPlaces);
128                 exit(EXIT_FAILURE);
129             }
130
131             tuples = PQntuples(resPlaces);
132             for (i = 0; i < tuples; i++)
133             {
134                 nominatim_exportPlace(PGint64(*((uint64_t *)PQgetvalue(resPlaces, i, 0))), conn, writer, NULL, NULL);
135                 rankTotalDone++;
136                 if (rankTotalDone%1000 == 0) printf("Done %i (k)\n", rankTotalDone/1000);
137             }
138             PQclear(resPlaces);
139         }
140         PQclear(resSectors);
141     }
142
143     nominatim_exportXMLEnd(writer);
144
145     PQfinish(conn);
146 }
147
148 void nominatim_exportCreatePreparedQueries(PGconn * conn)
149 {
150     Oid pg_prepare_params[2];
151     PGresult * res;
152
153     pg_prepare_params[0] = PG_OID_INT8;
154     res = PQprepare(conn, "placex_details",
155                     "select placex.osm_type, placex.osm_id, placex.class, placex.type, placex.name, placex.housenumber, placex.country_code, ST_AsText(placex.geometry), placex.admin_level, placex.rank_address, placex.rank_search, placex.parent_place_id, parent.osm_type, parent.osm_id, placex.indexed_status, placex.linked_place_id from placex left outer join placex as parent on (placex.parent_place_id = parent.place_id) where placex.place_id = $1",
156                     1, pg_prepare_params);
157     if (PQresultStatus(res) != PGRES_COMMAND_OK)
158     {
159         fprintf(stderr, "Error preparing placex_details: %s", PQerrorMessage(conn));
160         exit(EXIT_FAILURE);
161     }
162     PQclear(res);
163
164     pg_prepare_params[0] = PG_OID_INT8;
165     res = PQprepare(conn, "placex_address",
166                     "select osm_type,osm_id,class,type,distance,cached_rank_address,isaddress from place_addressline join placex on (address_place_id = placex.place_id) where place_addressline.place_id = $1 and address_place_id != place_addressline.place_id order by cached_rank_address asc,osm_type,osm_id",
167                     1, pg_prepare_params);
168     if (PQresultStatus(res) != PGRES_COMMAND_OK)
169     {
170         fprintf(stderr, "Error preparing placex_address: %s", PQerrorMessage(conn));
171         exit(EXIT_FAILURE);
172     }
173     PQclear(res);
174
175     pg_prepare_params[0] = PG_OID_INT8;
176     res = PQprepare(conn, "placex_names",
177                     "select (each(name)).key,(each(name)).value from (select name from placex where place_id = $1) as x order by (each(name)).key",
178                     1, pg_prepare_params);
179     if (PQresultStatus(res) != PGRES_COMMAND_OK)
180     {
181         fprintf(stderr, "Error preparing placex_names: %s", PQerrorMessage(conn));
182         exit(EXIT_FAILURE);
183     }
184     PQclear(res);
185
186     pg_prepare_params[0] = PG_OID_INT8;
187     res = PQprepare(conn, "placex_extratags",
188                     "select (each(extratags)).key,(each(extratags)).value from (select extratags from placex where place_id = $1) as x order by (each(extratags)).key",
189                     1, pg_prepare_params);
190     if (PQresultStatus(res) != PGRES_COMMAND_OK)
191     {
192         fprintf(stderr, "Error preparing placex_extratags: %s", PQerrorMessage(conn));
193         exit(EXIT_FAILURE);
194     }
195     PQclear(res);
196 }
197
198 xmlTextWriterPtr nominatim_exportXMLStart(const char *structuredoutputfile)
199 {
200     xmlTextWriterPtr writer;
201
202     writer = xmlNewTextWriterFilename(structuredoutputfile, 0);
203     if (writer==NULL)
204     {
205         fprintf(stderr, "Unable to open %s\n", structuredoutputfile);
206         exit(EXIT_FAILURE);
207     }
208     xmlTextWriterSetIndent(writer, 1);
209     if (xmlTextWriterStartDocument(writer, NULL, "UTF8", NULL) < 0)
210     {
211         fprintf(stderr, "xmlTextWriterStartDocument failed\n");
212         exit(EXIT_FAILURE);
213     }
214     if (xmlTextWriterStartElement(writer, BAD_CAST "osmStructured") < 0)
215     {
216         fprintf(stderr, "xmlTextWriterStartElement failed\n");
217         exit(EXIT_FAILURE);
218     }
219     if (xmlTextWriterWriteAttribute(writer, BAD_CAST "version", BAD_CAST "0.1") < 0)
220     {
221         fprintf(stderr, "xmlTextWriterWriteAttribute failed\n");
222         exit(EXIT_FAILURE);
223     }
224     if (xmlTextWriterWriteAttribute(writer, BAD_CAST "generator", BAD_CAST "Nominatim") < 0)
225     {
226         fprintf(stderr, "xmlTextWriterWriteAttribute failed\n");
227         exit(EXIT_FAILURE);
228     }
229
230     mode = 0;
231
232     return writer;
233 }
234
235 void nominatim_exportXMLEnd(xmlTextWriterPtr writer)
236 {
237     nominatim_exportEndMode(writer);
238
239     // End <osmStructured>
240     if (xmlTextWriterEndElement(writer) < 0)
241     {
242         fprintf(stderr, "xmlTextWriterEndElement failed\n");
243         exit(EXIT_FAILURE);
244     }
245     if (xmlTextWriterEndDocument(writer) < 0)
246     {
247         fprintf(stderr, "xmlTextWriterEndDocument failed\n");
248         exit(EXIT_FAILURE);
249     }
250     xmlFreeTextWriter(writer);
251 }
252
253 void nominatim_exportStartMode(xmlTextWriterPtr writer, int newMode)
254 {
255     if (mode == newMode) return;
256
257     nominatim_exportEndMode(writer);
258
259     switch(newMode)
260     {
261     case 0:
262         break;
263
264     case 1:
265         if (xmlTextWriterStartElement(writer, BAD_CAST "add") < 0)
266         {
267             fprintf(stderr, "xmlTextWriterStartElement failed\n");
268             exit(EXIT_FAILURE);
269         }
270         break;
271
272     case 2:
273         if (xmlTextWriterStartElement(writer, BAD_CAST "update") < 0)
274         {
275             fprintf(stderr, "xmlTextWriterStartElement failed\n");
276             exit(EXIT_FAILURE);
277         }
278         break;
279
280     case 3:
281         if (xmlTextWriterStartElement(writer, BAD_CAST "delete") < 0)
282         {
283             fprintf(stderr, "xmlTextWriterStartElement failed\n");
284             exit(EXIT_FAILURE);
285         }
286         break;
287     }
288     mode = newMode;
289 }
290
291 void nominatim_exportEndMode(xmlTextWriterPtr writer)
292 {
293     if (!mode) return;
294
295     if (xmlTextWriterEndElement(writer) < 0)
296     {
297         fprintf(stderr, "xmlTextWriterEndElement failed\n");
298         exit(EXIT_FAILURE);
299     }
300 }
301
302 void nominatim_exportPlaceQueries(uint64_t place_id, PGconn * conn, struct export_data * querySet)
303 {
304     const char *        paramValues[1];
305     int                 paramLengths[1];
306     int                 paramFormats[1];
307     uint64_t            paramPlaceID;
308
309     paramPlaceID = PGint64(place_id);
310     paramValues[0] = (char *)&paramPlaceID;
311     paramLengths[0] = sizeof(paramPlaceID);
312     paramFormats[0] = 1;
313
314     querySet->res = PQexecPrepared(conn, "placex_details", 1, paramValues, paramLengths, paramFormats, 0);
315     if (PQresultStatus(querySet->res) != PGRES_TUPLES_OK)
316     {
317         fprintf(stderr, "placex_details: SELECT failed: %s", PQerrorMessage(conn));
318         PQclear(querySet->res);
319         exit(EXIT_FAILURE);
320     }
321
322     querySet->resNames = PQexecPrepared(conn, "placex_names", 1, paramValues, paramLengths, paramFormats, 0);
323     if (PQresultStatus(querySet->resNames) != PGRES_TUPLES_OK)
324     {
325         fprintf(stderr, "placex_names: SELECT failed: %s", PQerrorMessage(conn));
326         PQclear(querySet->resNames);
327         exit(EXIT_FAILURE);
328     }
329
330     querySet->resAddress = PQexecPrepared(conn, "placex_address", 1, paramValues, paramLengths, paramFormats, 0);
331     if (PQresultStatus(querySet->resAddress) != PGRES_TUPLES_OK)
332     {
333         fprintf(stderr, "placex_address: SELECT failed: %s", PQerrorMessage(conn));
334         PQclear(querySet->resAddress);
335         exit(EXIT_FAILURE);
336     }
337
338     querySet->resExtraTags = PQexecPrepared(conn, "placex_extratags", 1, paramValues, paramLengths, paramFormats, 0);
339     if (PQresultStatus(querySet->resExtraTags) != PGRES_TUPLES_OK)
340     {
341         fprintf(stderr, "placex_extratags: SELECT failed: %s", PQerrorMessage(conn));
342         PQclear(querySet->resExtraTags);
343         exit(EXIT_FAILURE);
344     }
345 }
346
347 void nominatim_exportFreeQueries(struct export_data * querySet)
348 {
349     PQclear(querySet->res);
350     PQclear(querySet->resNames);
351     PQclear(querySet->resAddress);
352     PQclear(querySet->resExtraTags);
353 }
354
355 /*
356  * Requirements: the prepared queries must exist
357  */
358 void nominatim_exportPlace(uint64_t place_id, PGconn * conn,
359   xmlTextWriterPtr writer, pthread_mutex_t * writer_mutex, struct export_data * prevQuerySet)
360 {
361     struct export_data          querySet;
362
363     int                         i;
364
365     nominatim_exportPlaceQueries(place_id, conn, &querySet);
366
367     // Add, modify or delete?
368     if (prevQuerySet)
369     {
370         if ((PQgetvalue(prevQuerySet->res, 0, 14) && strcmp(PQgetvalue(prevQuerySet->res, 0, 14), "100") == 0) || PQntuples(querySet.res) == 0)
371         {
372             // Delete
373             if (writer_mutex) pthread_mutex_lock( writer_mutex );
374             nominatim_exportStartMode(writer, 3);
375             xmlTextWriterStartElement(writer, BAD_CAST "feature");
376             xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "place_id", "%li", place_id);
377             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 0));
378             xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 1));
379             xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 2));
380             xmlTextWriterWriteAttribute(writer, BAD_CAST "value", BAD_CAST PQgetvalue(prevQuerySet->res, 0, 3));
381             xmlTextWriterEndElement(writer);
382             if (writer_mutex) pthread_mutex_unlock( writer_mutex );
383             nominatim_exportFreeQueries(&querySet);
384             return;
385         }
386         if (PQgetvalue(prevQuerySet->res, 0, 14) && strcmp(PQgetvalue(prevQuerySet->res, 0, 14), "1") == 0)
387         {
388             // Add
389             if (writer_mutex) pthread_mutex_lock( writer_mutex );
390             nominatim_exportStartMode(writer, 1);
391         }
392         else
393         {
394             // Update, but only if something has changed
395
396             // TODO: detect changes
397
398             if (writer_mutex) pthread_mutex_lock( writer_mutex );
399             nominatim_exportStartMode(writer, 2);
400         }
401     }
402     else
403     {
404        // Add
405        if (writer_mutex) pthread_mutex_lock( writer_mutex );
406        nominatim_exportStartMode(writer, 1);
407     }
408
409     xmlTextWriterStartElement(writer, BAD_CAST "feature");
410     xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "place_id", "%li", place_id);
411     xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.res, 0, 0));
412     xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST PQgetvalue(querySet.res, 0, 1));
413     xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST PQgetvalue(querySet.res, 0, 2));
414     xmlTextWriterWriteAttribute(writer, BAD_CAST "value", BAD_CAST PQgetvalue(querySet.res, 0, 3));
415     xmlTextWriterWriteAttribute(writer, BAD_CAST "rank", BAD_CAST PQgetvalue(querySet.res, 0, 9));
416     xmlTextWriterWriteAttribute(writer, BAD_CAST "importance", BAD_CAST PQgetvalue(querySet.res, 0, 10));
417     xmlTextWriterWriteAttribute(writer, BAD_CAST "parent_place_id", BAD_CAST PQgetvalue(querySet.res, 0, 11));
418     xmlTextWriterWriteAttribute(writer, BAD_CAST "parent_type", BAD_CAST PQgetvalue(querySet.res, 0, 12));
419     xmlTextWriterWriteAttribute(writer, BAD_CAST "parent_id", BAD_CAST PQgetvalue(querySet.res, 0, 13));
420     xmlTextWriterWriteAttribute(writer, BAD_CAST "linked_place_id", BAD_CAST PQgetvalue(querySet.res, 0, 15));
421
422     if (PQntuples(querySet.resNames))
423     {
424         xmlTextWriterStartElement(writer, BAD_CAST "names");
425
426         for (i = 0; i < PQntuples(querySet.resNames); i++)
427         {
428             xmlTextWriterStartElement(writer, BAD_CAST "name");
429             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.resNames, i, 0));
430             xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.resNames, i, 1));
431             xmlTextWriterEndElement(writer);
432         }
433
434         xmlTextWriterEndElement(writer);
435     }
436
437     if (PQgetvalue(querySet.res, 0, 5) && strlen(PQgetvalue(querySet.res, 0, 5)))
438     {
439         xmlTextWriterStartElement(writer, BAD_CAST "houseNumber");
440         xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 5));
441         xmlTextWriterEndElement(writer);
442     }
443
444     if (PQgetvalue(querySet.res, 0, 8) && strlen(PQgetvalue(querySet.res, 0, 8)))
445     {
446         xmlTextWriterStartElement(writer, BAD_CAST "adminLevel");
447         xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 8));
448         xmlTextWriterEndElement(writer);
449     }
450
451     if (PQgetvalue(querySet.res, 0, 6) && strlen(PQgetvalue(querySet.res, 0, 6)))
452     {
453         xmlTextWriterStartElement(writer, BAD_CAST "countryCode");
454         xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 6));
455         xmlTextWriterEndElement(writer);
456     }
457
458     if (PQntuples(querySet.resAddress) > 0)
459     {
460         xmlTextWriterStartElement(writer, BAD_CAST "address");
461         for (i = 0; i < PQntuples(querySet.resAddress); i++)
462         {
463             xmlTextWriterStartElement(writer, BAD_CAST getRankLabel(atoi(PQgetvalue(querySet.resAddress, i, 5))));
464             xmlTextWriterWriteAttribute(writer, BAD_CAST "rank", BAD_CAST PQgetvalue(querySet.resAddress, i, 5));
465             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.resAddress, i, 0));
466             xmlTextWriterWriteAttribute(writer, BAD_CAST "id", BAD_CAST PQgetvalue(querySet.resAddress, i, 1));
467             xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST PQgetvalue(querySet.resAddress, i, 2));
468             xmlTextWriterWriteAttribute(writer, BAD_CAST "value", BAD_CAST PQgetvalue(querySet.resAddress, i, 3));
469             xmlTextWriterWriteAttribute(writer, BAD_CAST "distance", BAD_CAST PQgetvalue(querySet.resAddress, i, 4));
470             xmlTextWriterWriteAttribute(writer, BAD_CAST "isaddress", BAD_CAST PQgetvalue(querySet.resAddress, i, 6));
471             xmlTextWriterEndElement(writer);
472         }
473         xmlTextWriterEndElement(writer);
474     }
475
476     if (PQntuples(querySet.resExtraTags))
477     {
478         xmlTextWriterStartElement(writer, BAD_CAST "tags");
479
480         for (i = 0; i < PQntuples(querySet.resExtraTags); i++)
481         {
482             xmlTextWriterStartElement(writer, BAD_CAST "tag");
483             xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST PQgetvalue(querySet.resExtraTags, i, 0));
484             xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.resExtraTags, i, 1));
485             xmlTextWriterEndElement(writer);
486         }
487
488         xmlTextWriterEndElement(writer);
489     }
490
491
492     xmlTextWriterStartElement(writer, BAD_CAST "osmGeometry");
493     xmlTextWriterWriteString(writer, BAD_CAST PQgetvalue(querySet.res, 0, 7));
494     xmlTextWriterEndElement(writer);
495
496     xmlTextWriterEndElement(writer); // </feature>
497
498     if (writer_mutex) pthread_mutex_unlock( writer_mutex );
499
500     nominatim_exportFreeQueries(&querySet);
501 }
502
503 const char * getRankLabel(int rank)
504 {
505     switch (rank)
506     {
507     case 0:
508     case 1:
509         return "continent";
510     case 2:
511     case 3:
512         return "sea";
513     case 4:
514     case 5:
515     case 6:
516     case 7:
517         return "country";
518     case 8:
519     case 9:
520     case 10:
521     case 11:
522         return "state";
523     case 12:
524     case 13:
525     case 14:
526     case 15:
527         return "county";
528     case 16:
529         return "city";
530     case 17:
531         return "town";
532     case 18:
533         return "village";
534     case 19:
535         return "unknown";
536     case 20:
537         return "suburb";
538     case 21:
539         return "postcode";
540     case 22:
541         return "neighborhood";
542     case 23:
543         return "postcode";
544     case 24:
545         return "unknown";
546     case 25:
547         return "postcode";
548     case 26:
549         return "street";
550     case 27:
551         return "access";
552     case 28:
553         return "building";
554     case 29:
555     default:
556         return "other";
557     }
558 }