8 #include <libxml/xmlstring.h>
9 #include <libxml/xmlreader.h>
10 #include <libxml/hash.h>
12 #include "nominatim.h"
16 typedef enum { FILETYPE_NONE, FILETYPE_STRUCTUREDV0P1 } filetypes_t;
17 typedef enum { FILEMODE_NONE, FILEMODE_ADD, FILEMODE_UPDATE, FILEMODE_DELETE } filemodes_t;
19 #define MAX_FEATUREADDRESS 500
20 #define MAX_FEATURENAMES 1000
21 #define MAX_FEATUREEXTRATAGS 100
22 #define MAX_FEATURENAMESTRING 100000
23 #define MAX_FEATUREEXTRATAGSTRING 50000
25 struct feature_address
50 xmlChar * rankAddress;
52 xmlChar * countryCode;
54 xmlChar * houseNumber;
58 int fileType = FILETYPE_NONE;
59 int fileMode = FILEMODE_ADD;
61 struct feature_address featureAddress[MAX_FEATUREADDRESS];
62 struct feature_tag featureName[MAX_FEATURENAMES];
63 struct feature_tag featureExtraTag[MAX_FEATUREEXTRATAGS];
64 struct feature feature;
65 int featureAddressLines = 0;
66 int featureNameLines = 0;
67 int featureExtraTagLines = 0;
69 xmlHashTablePtr partionTableTagsHash;
70 char featureNameString[MAX_FEATURENAMESTRING];
71 char featureExtraTagString[MAX_FEATUREEXTRATAGSTRING];
73 void StartElement(xmlTextReaderPtr reader, const xmlChar *name)
79 if (fileType == FILETYPE_NONE)
81 // Potential to handle other file types in the future / versions
82 if (xmlStrEqual(name, BAD_CAST "osmStructured"))
84 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "version");
85 version = strtof(value, NULL);
88 if (version == (float)0.1)
90 fileType = FILETYPE_STRUCTUREDV0P1;
91 fileMode = FILEMODE_ADD;
95 fprintf( stderr, "Unknown osmStructured version %f\n", version );
101 fprintf( stderr, "Unknown XML document type: %s\n", name );
107 if (xmlStrEqual(name, BAD_CAST "add"))
109 fileMode = FILEMODE_ADD;
112 if (xmlStrEqual(name, BAD_CAST "update"))
114 fileMode = FILEMODE_UPDATE;
117 if (xmlStrEqual(name, BAD_CAST "delete"))
119 fileMode = FILEMODE_DELETE;
122 if (fileMode == FILEMODE_NONE)
124 fprintf( stderr, "Unknown import mode in: %s\n", name );
128 if (xmlStrEqual(name, BAD_CAST "feature"))
130 feature.type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
131 feature.id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
132 feature.key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
133 feature.value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
134 feature.rankAddress = xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
135 feature.rankSearch = xmlTextReaderGetAttribute(reader, BAD_CAST "importance");
137 feature.countryCode = NULL;
138 feature.adminLevel = NULL;
139 feature.houseNumber = NULL;
140 feature.geometry = NULL;
141 featureAddressLines = 0;
142 featureNameLines = 0;
146 if (xmlStrEqual(name, BAD_CAST "names")) return;
147 if (xmlStrEqual(name, BAD_CAST "name"))
149 featureName[featureNameLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
150 featureName[featureNameLines].value = xmlTextReaderReadString(reader);
152 if (featureNameLines >= MAX_FEATURENAMES)
154 fprintf( stderr, "Too many name elements\n");
159 if (xmlStrEqual(name, BAD_CAST "tags")) return;
160 if (xmlStrEqual(name, BAD_CAST "tag"))
162 featureExtraTag[featureExtraTagLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
163 featureExtraTag[featureExtraTagLines].value = xmlTextReaderReadString(reader);
164 featureExtraTagLines++;
165 if (featureExtraTagLines >= MAX_FEATUREEXTRATAGS)
167 fprintf( stderr, "Too many extra tag elements\n");
172 if (xmlStrEqual(name, BAD_CAST "osmGeometry"))
174 feature.geometry = xmlTextReaderReadString(reader);
177 if (xmlStrEqual(name, BAD_CAST "adminLevel"))
179 feature.adminLevel = xmlTextReaderReadString(reader);
182 if (xmlStrEqual(name, BAD_CAST "countryCode"))
184 feature.countryCode = xmlTextReaderReadString(reader);
187 if (xmlStrEqual(name, BAD_CAST "houseNumber"))
189 feature.houseNumber = xmlTextReaderReadString(reader);
192 if (xmlStrEqual(name, BAD_CAST "address"))
194 featureAddressLines = 0;
198 if (xmlStrEqual(name, BAD_CAST "continent"))
202 else if (xmlStrEqual(name, BAD_CAST "sea"))
206 else if (xmlStrEqual(name, BAD_CAST "country"))
210 else if (xmlStrEqual(name, BAD_CAST "state"))
214 else if (xmlStrEqual(name, BAD_CAST "county"))
218 else if (xmlStrEqual(name, BAD_CAST "city"))
222 else if (xmlStrEqual(name, BAD_CAST "town"))
226 else if (xmlStrEqual(name, BAD_CAST "village"))
230 else if (xmlStrEqual(name, BAD_CAST "unknown"))
234 else if (xmlStrEqual(name, BAD_CAST "suburb"))
238 else if (xmlStrEqual(name, BAD_CAST "postcode"))
242 else if (xmlStrEqual(name, BAD_CAST "neighborhood"))
246 else if (xmlStrEqual(name, BAD_CAST "street"))
250 else if (xmlStrEqual(name, BAD_CAST "access"))
254 else if (xmlStrEqual(name, BAD_CAST "building"))
258 else if (xmlStrEqual(name, BAD_CAST "other"))
264 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
267 fprintf( stderr, "Address element missing rank\n");
270 featureAddress[featureAddressLines].rankAddress = atoi(value);
273 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "isaddress");
276 fprintf( stderr, "Address element missing rank\n");
279 if (*value == 't') strcpy(featureAddress[featureAddressLines].isAddress, "t");
280 else strcpy(featureAddress[featureAddressLines].isAddress, "f");
283 featureAddress[featureAddressLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
284 featureAddress[featureAddressLines].id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
285 featureAddress[featureAddressLines].key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
286 featureAddress[featureAddressLines].value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
287 featureAddress[featureAddressLines].distance = xmlTextReaderGetAttribute(reader, BAD_CAST "distance");
289 featureAddressLines++;
290 if (featureAddressLines >= MAX_FEATUREADDRESS)
292 fprintf( stderr, "Too many address elements\n");
298 fprintf(stderr, "%s: Unknown element name: %s\n", __FUNCTION__, name);
301 void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
304 PGresult * resPlaceID;
305 const char * paramValues[11];
307 char * partionQueryName;
308 int i, namePos, lineTypeLen, lineValueLen;
310 if (xmlStrEqual(name, BAD_CAST "feature"))
313 if (featureCount % 1000 == 0) printf("feature %i(k)\n", featureCount/1000);
315 if (fileMode == FILEMODE_ADD)
317 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 0, NULL, NULL, NULL, 0);
318 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
320 fprintf(stderr, "get_place_id: INSERT failed: %s", PQerrorMessage(conn));
327 paramValues[0] = (const char *)feature.type;
328 paramValues[1] = (const char *)feature.id;
329 paramValues[2] = (const char *)feature.key;
330 paramValues[3] = (const char *)feature.value;
331 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 4, paramValues, NULL, NULL, 0);
332 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
334 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
339 place_id = PQgetvalue(resPlaceID, 0, 0);
341 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_DELETE)
343 paramValues[0] = (const char *)place_id;
344 res = PQexecPrepared(conn, "placex_delete", 1, paramValues, NULL, NULL, 0);
345 if (PQresultStatus(res) != PGRES_COMMAND_OK)
347 fprintf(stderr, "placex_delete: DELETE failed: %s", PQerrorMessage(conn));
353 res = PQexecPrepared(conn, "search_name_delete", 1, paramValues, NULL, NULL, 0);
354 if (PQresultStatus(res) != PGRES_COMMAND_OK)
356 fprintf(stderr, "search_name_delete: DELETE failed: %s", PQerrorMessage(conn));
362 res = PQexecPrepared(conn, "place_addressline_delete", 1, paramValues, NULL, NULL, 0);
363 if (PQresultStatus(res) != PGRES_COMMAND_OK)
365 fprintf(stderr, "place_addressline_delete: DELETE failed: %s", PQerrorMessage(conn));
372 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_ADD)
374 // Insert into placex
375 paramValues[0] = (const char *)place_id;
376 paramValues[1] = (const char *)feature.type;
377 paramValues[2] = (const char *)feature.id;
378 paramValues[3] = (const char *)feature.key;
379 paramValues[4] = (const char *)feature.value;
381 featureNameString[0] = 0;
382 if (featureNameLines)
387 for (i = 0; i < featureNameLines; i++)
389 lineTypeLen = strlen(BAD_CAST featureName[i].type);
390 lineValueLen = strlen(BAD_CAST featureName[i].value);
391 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATURENAMESTRING)
393 fprintf(stderr, "feature name too long: %s", (const char *)featureName[i].value);
396 if (namePos) strcpy(featureNameString+(namePos++), ",");
397 strcpy(featureNameString+(namePos++), "\"");
398 strcpy(featureNameString+namePos, BAD_CAST featureName[i].type);
399 namePos += lineTypeLen;
400 strcpy(featureNameString+namePos, "\"=>\"");
402 strcpy(featureNameString+namePos, BAD_CAST featureName[i].value);
403 namePos += lineValueLen;
404 strcpy(featureNameString+(namePos++), "\"");
407 paramValues[5] = (const char *)featureNameString;
409 featureExtraTagString[0] = 0;
410 if (featureExtraTagLines)
415 for (i = 0; i < featureExtraTagLines; i++)
417 lineTypeLen = strlen(BAD_CAST featureExtraTag[i].type);
418 lineValueLen = strlen(BAD_CAST featureExtraTag[i].value);
419 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATUREEXTRATAGSTRING)
421 fprintf(stderr, "feature extra tag too long: %s", (const char *)featureExtraTag[i].value);
424 if (namePos) strcpy(featureExtraTagString+(namePos++),",");
425 strcpy(featureExtraTagString+(namePos++), "\"");
426 strcpy(featureExtraTagString+namePos, BAD_CAST featureExtraTag[i].type);
427 namePos += lineTypeLen;
428 strcpy(featureExtraTagString+namePos, "\"=>\"");
430 strcpy(featureExtraTagString+namePos, BAD_CAST featureExtraTag[i].value);
431 namePos += lineValueLen;
432 strcpy(featureExtraTagString+(namePos++), "\"");
435 paramValues[6] = (const char *)featureExtraTagString;
437 paramValues[7] = (const char *)feature.adminLevel;
438 paramValues[8] = (const char *)feature.houseNumber;
439 paramValues[9] = (const char *)feature.rankAddress;
440 paramValues[10] = (const char *)feature.rankSearch;
441 paramValues[11] = (const char *)feature.geometry;
442 res = PQexecPrepared(conn, "placex_insert", 12, paramValues, NULL, NULL, 0);
443 if (PQresultStatus(res) != PGRES_COMMAND_OK)
445 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
451 for (i = 0; i < featureAddressLines; i++)
453 // insert into place_address
454 paramValues[0] = (const char *)place_id;
455 paramValues[1] = (const char *)featureAddress[i].distance;
456 paramValues[2] = (const char *)featureAddress[i].type;
457 paramValues[3] = (const char *)featureAddress[i].id;
458 paramValues[4] = (const char *)featureAddress[i].key;
459 paramValues[5] = (const char *)featureAddress[i].value;
460 paramValues[6] = (const char *)featureAddress[i].isAddress;
461 res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
462 if (PQresultStatus(res) != PGRES_COMMAND_OK)
464 fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
470 xmlFree(featureAddress[i].type);
471 xmlFree(featureAddress[i].id);
472 xmlFree(featureAddress[i].key);
473 xmlFree(featureAddress[i].value);
474 xmlFree(featureAddress[i].distance);
477 if (featureNameLines)
479 paramValues[0] = (const char *)place_id;
480 res = PQexecPrepared(conn, "search_name_insert", 1, paramValues, NULL, NULL, 0);
481 if (PQresultStatus(res) != PGRES_COMMAND_OK)
483 fprintf(stderr, "search_name_insert: INSERT failed: %s", PQerrorMessage(conn));
490 partionQueryName = xmlHashLookup2(partionTableTagsHash, feature.key, feature.value);
491 if (partionQueryName)
493 // insert into partition table
494 paramValues[0] = (const char *)place_id;
495 paramValues[1] = (const char *)feature.geometry;
496 res = PQexecPrepared(conn, partionQueryName, 2, paramValues, NULL, NULL, 0);
497 if (PQresultStatus(res) != PGRES_COMMAND_OK)
499 fprintf(stderr, "%s: INSERT failed: %s", partionQueryName, PQerrorMessage(conn));
510 for (i = 0; i < featureAddressLines; i++)
512 xmlFree(featureAddress[i].type);
513 xmlFree(featureAddress[i].id);
514 xmlFree(featureAddress[i].key);
515 xmlFree(featureAddress[i].value);
516 xmlFree(featureAddress[i].distance);
520 xmlFree(feature.type);
522 xmlFree(feature.key);
523 xmlFree(feature.value);
524 xmlFree(feature.rankAddress);
525 xmlFree(feature.rankSearch);
526 // if (feature.name) xmlFree(feature.name);
527 if (feature.countryCode) xmlFree(feature.countryCode);
528 if (feature.adminLevel) xmlFree(feature.adminLevel);
529 if (feature.houseNumber) xmlFree(feature.houseNumber);
530 if (feature.geometry) xmlFree(feature.geometry);
536 static void processNode(xmlTextReaderPtr reader)
539 name = xmlTextReaderName(reader);
542 name = xmlStrdup(BAD_CAST "--");
545 switch (xmlTextReaderNodeType(reader))
547 case XML_READER_TYPE_ELEMENT:
548 StartElement(reader, name);
549 if (xmlTextReaderIsEmptyElement(reader))
550 EndElement(reader, name); /* No end_element for self closing tags! */
552 case XML_READER_TYPE_END_ELEMENT:
553 EndElement(reader, name);
555 case XML_READER_TYPE_TEXT:
556 case XML_READER_TYPE_CDATA:
557 case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
561 fprintf(stderr, "Unknown node type %d\n", xmlTextReaderNodeType(reader));
568 int nominatim_import(const char *conninfo, const char *partionTagsFilename, const char *filename)
570 xmlTextReaderPtr reader;
573 FILE * partionTagsFile;
574 char * partionQueryName;
575 char partionQuerySQL[1024];
577 conn = PQconnectdb(conninfo);
578 if (PQstatus(conn) != CONNECTION_OK)
580 fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
584 partionTableTagsHash = xmlHashCreate(200);
586 partionTagsFile = fopen(partionTagsFilename, "rt");
587 if (!partionTagsFile)
589 fprintf(stderr, "Unable to read partition tags file: %s\n", partionTagsFilename);
593 char buffer[1024], osmkey[256], osmvalue[256];
595 while (fgets(buffer, sizeof(buffer), partionTagsFile) != NULL)
597 fields = sscanf( buffer, "%23s %63s", osmkey, osmvalue );
599 if ( fields <= 0 ) continue;
603 fprintf( stderr, "Error partition file\n");
606 partionQueryName = malloc(strlen("partition_insert_")+strlen(osmkey)+strlen(osmvalue)+2);
607 strcpy(partionQueryName, "partition_insert_");
608 strcat(partionQueryName, osmkey);
609 strcat(partionQueryName, "_");
610 strcat(partionQueryName, osmvalue);
612 strcpy(partionQuerySQL, "insert into place_classtype_");
613 strcat(partionQuerySQL, osmkey);
614 strcat(partionQuerySQL, "_");
615 strcat(partionQuerySQL, osmvalue);
616 strcat(partionQuerySQL, " (place_id, centroid) values ($1, ST_Centroid(st_setsrid($2, 4326)))");
618 res = PQprepare(conn, partionQueryName, partionQuerySQL, 2, NULL);
619 if (PQresultStatus(res) != PGRES_COMMAND_OK)
621 fprintf(stderr, "Failed to prepare %s: %s\n", partionQueryName, PQerrorMessage(conn));
625 xmlHashAddEntry2(partionTableTagsHash, BAD_CAST osmkey, BAD_CAST osmvalue, BAD_CAST partionQueryName);
628 res = PQprepare(conn, "get_new_place_id",
629 "select nextval('seq_place')",
631 if (PQresultStatus(res) != PGRES_COMMAND_OK)
633 fprintf(stderr, "Failed to prepare get_new_place_id: %s\n", PQerrorMessage(conn));
637 res = PQprepare(conn, "get_place_id",
638 "select place_id from placex where osm_type = $1 and osm_id = $2 and class = $3 and type = $4",
640 if (PQresultStatus(res) != PGRES_COMMAND_OK)
642 fprintf(stderr, "Failed to prepare get_place_id: %s\n", PQerrorMessage(conn));
646 res = PQprepare(conn, "placex_insert",
647 "insert into placex (place_id,osm_type,osm_id,class,type,name,extratags,admin_level,housenumber,rank_address,rank_search,geometry) "
648 "values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, st_setsrid($12, 4326))",
650 if (PQresultStatus(res) != PGRES_COMMAND_OK)
652 fprintf(stderr, "Failed to prepare placex_insert: %s\n", PQerrorMessage(conn));
656 res = PQprepare(conn, "search_name_insert",
657 "insert into search_name (place_id, search_rank, address_rank, country_code, name_vector, nameaddress_vector, centroid) "
658 "select place_id, rank_address, rank_search, country_code, make_keywords(name), "
659 "(select uniq(sort(array_agg(name_vector))) from place_addressline join search_name on "
660 "(address_place_id = search_name.place_id) where place_addressline.place_id = $1 ), st_centroid(geometry) from placex "
661 "where place_id = $1",
663 if (PQresultStatus(res) != PGRES_COMMAND_OK)
665 fprintf(stderr, "Failed to prepare search_name_insert: %s\n", PQerrorMessage(conn));
669 res = PQprepare(conn, "place_addressline_insert",
670 "insert into place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) "
671 "select $1, place_id, false, $7, $2, rank_address from placex where osm_type = $3 and osm_id = $4 and class = $5 and type = $6",
673 if (PQresultStatus(res) != PGRES_COMMAND_OK)
675 fprintf(stderr, "Failed to prepare place_addressline_insert: %s\n", PQerrorMessage(conn));
679 res = PQprepare(conn, "placex_delete",
680 "delete from placex where place_id = $1",
682 if (PQresultStatus(res) != PGRES_COMMAND_OK)
684 fprintf(stderr, "Failed to prepare placex_delete: %s\n", PQerrorMessage(conn));
688 res = PQprepare(conn, "search_name_delete",
689 "delete from search_name where place_id = $1",
691 if (PQresultStatus(res) != PGRES_COMMAND_OK)
693 fprintf(stderr, "Failed to prepare search_name_delete: %s\n", PQerrorMessage(conn));
697 res = PQprepare(conn, "place_addressline_delete",
698 "delete from place_addressline where place_id = $1",
700 if (PQresultStatus(res) != PGRES_COMMAND_OK)
702 fprintf(stderr, "Failed to prepare place_addressline_delete: %s\n", PQerrorMessage(conn));
708 reader = inputUTF8(filename);
712 fprintf(stderr, "Unable to open %s\n", filename);
716 ret = xmlTextReaderRead(reader);
720 ret = xmlTextReaderRead(reader);
724 fprintf(stderr, "%s : failed to parse\n", filename);
728 xmlFreeTextReader(reader);
729 xmlHashFree(partionTableTagsHash, NULL);