8 #include <libxml/xmlstring.h>
9 #include <libxml/xmlreader.h>
10 #include <libxml/hash.h>
12 #include "nominatim.h"
16 typedef enum { FILETYPE_NONE, FILETYPE_STRUCTUREDV0P1 } filetypes_t;
17 typedef enum { FILEMODE_NONE, FILEMODE_ADD, FILEMODE_UPDATE, FILEMODE_DELETE } filemodes_t;
19 #define MAX_FEATUREADDRESS 5000
20 #define MAX_FEATURENAMES 10000
21 #define MAX_FEATUREEXTRATAGS 10000
22 #define MAX_FEATURENAMESTRING 1000000
23 #define MAX_FEATUREEXTRATAGSTRING 500000
25 struct feature_address
50 xmlChar * rankAddress;
52 xmlChar * countryCode;
53 xmlChar * parentPlaceID;
57 xmlChar * houseNumber;
61 int fileType = FILETYPE_NONE;
62 int fileMode = FILEMODE_ADD;
64 struct feature_address featureAddress[MAX_FEATUREADDRESS];
65 struct feature_tag featureName[MAX_FEATURENAMES];
66 struct feature_tag featureExtraTag[MAX_FEATUREEXTRATAGS];
67 struct feature feature;
68 int featureAddressLines = 0;
69 int featureNameLines = 0;
70 int featureExtraTagLines = 0;
72 xmlHashTablePtr partionTableTagsHash;
73 xmlHashTablePtr partionTableTagsHashDelete;
74 char featureNameString[MAX_FEATURENAMESTRING];
75 char featureExtraTagString[MAX_FEATUREEXTRATAGSTRING];
77 void StartElement(xmlTextReaderPtr reader, const xmlChar *name)
83 if (fileType == FILETYPE_NONE)
85 // Potential to handle other file types in the future / versions
86 if (xmlStrEqual(name, BAD_CAST "osmStructured"))
88 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "version");
89 version = strtof(value, NULL);
92 if (version == (float)0.1)
94 fileType = FILETYPE_STRUCTUREDV0P1;
95 fileMode = FILEMODE_ADD;
99 fprintf( stderr, "Unknown osmStructured version %f (%s)\n", version, value );
105 fprintf( stderr, "Unknown XML document type: %s\n", name );
111 if (xmlStrEqual(name, BAD_CAST "add"))
113 fileMode = FILEMODE_ADD;
116 if (xmlStrEqual(name, BAD_CAST "update"))
118 fileMode = FILEMODE_UPDATE;
121 if (xmlStrEqual(name, BAD_CAST "delete"))
123 fileMode = FILEMODE_DELETE;
126 if (fileMode == FILEMODE_NONE)
128 fprintf( stderr, "Unknown import mode in: %s\n", name );
132 if (xmlStrEqual(name, BAD_CAST "feature"))
134 feature.placeID = xmlTextReaderGetAttribute(reader, BAD_CAST "place_id");
135 feature.type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
136 feature.id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
137 feature.key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
138 feature.value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
139 feature.rankAddress = xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
140 feature.rankSearch = xmlTextReaderGetAttribute(reader, BAD_CAST "importance");
142 feature.parentPlaceID = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_place_id");
143 feature.parentType = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_type");
144 feature.parentID = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_id");
146 feature.countryCode = NULL;
147 feature.adminLevel = NULL;
148 feature.houseNumber = NULL;
149 feature.geometry = NULL;
150 featureAddressLines = 0;
151 featureNameLines = 0;
152 featureExtraTagLines = 0;
156 if (xmlStrEqual(name, BAD_CAST "names")) return;
157 if (xmlStrEqual(name, BAD_CAST "name"))
159 if (featureNameLines < MAX_FEATURENAMES)
161 featureName[featureNameLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
162 featureName[featureNameLines].value = xmlTextReaderReadString(reader);
167 fprintf( stderr, "Too many name elements (%s%s)\n", feature.type, feature.id);
172 if (xmlStrEqual(name, BAD_CAST "tags")) return;
173 if (xmlStrEqual(name, BAD_CAST "tag"))
175 if (featureExtraTagLines < MAX_FEATUREEXTRATAGS)
177 featureExtraTag[featureExtraTagLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
178 featureExtraTag[featureExtraTagLines].value = xmlTextReaderReadString(reader);
179 featureExtraTagLines++;
183 fprintf( stderr, "Too many extra tag elements (%s%s)\n", feature.type, feature.id);
188 if (xmlStrEqual(name, BAD_CAST "osmGeometry"))
190 feature.geometry = xmlTextReaderReadString(reader);
193 if (xmlStrEqual(name, BAD_CAST "adminLevel"))
195 feature.adminLevel = xmlTextReaderReadString(reader);
198 if (xmlStrEqual(name, BAD_CAST "countryCode"))
200 feature.countryCode = xmlTextReaderReadString(reader);
203 if (xmlStrEqual(name, BAD_CAST "houseNumber"))
205 feature.houseNumber = xmlTextReaderReadString(reader);
208 if (xmlStrEqual(name, BAD_CAST "address"))
210 featureAddressLines = 0;
214 if (xmlStrEqual(name, BAD_CAST "continent"))
218 else if (xmlStrEqual(name, BAD_CAST "sea"))
222 else if (xmlStrEqual(name, BAD_CAST "country"))
226 else if (xmlStrEqual(name, BAD_CAST "state"))
230 else if (xmlStrEqual(name, BAD_CAST "county"))
234 else if (xmlStrEqual(name, BAD_CAST "city"))
238 else if (xmlStrEqual(name, BAD_CAST "town"))
242 else if (xmlStrEqual(name, BAD_CAST "village"))
246 else if (xmlStrEqual(name, BAD_CAST "unknown"))
250 else if (xmlStrEqual(name, BAD_CAST "suburb"))
254 else if (xmlStrEqual(name, BAD_CAST "postcode"))
258 else if (xmlStrEqual(name, BAD_CAST "neighborhood"))
262 else if (xmlStrEqual(name, BAD_CAST "street"))
266 else if (xmlStrEqual(name, BAD_CAST "access"))
270 else if (xmlStrEqual(name, BAD_CAST "building"))
274 else if (xmlStrEqual(name, BAD_CAST "other"))
280 if (featureAddressLines < MAX_FEATUREADDRESS)
282 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
285 fprintf( stderr, "Address element missing rank\n");
288 featureAddress[featureAddressLines].rankAddress = atoi(value);
291 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "isaddress");
294 fprintf( stderr, "Address element missing rank\n");
297 if (*value == 't') strcpy(featureAddress[featureAddressLines].isAddress, "t");
298 else strcpy(featureAddress[featureAddressLines].isAddress, "f");
301 featureAddress[featureAddressLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
302 featureAddress[featureAddressLines].id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
303 featureAddress[featureAddressLines].key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
304 featureAddress[featureAddressLines].value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
305 featureAddress[featureAddressLines].distance = xmlTextReaderGetAttribute(reader, BAD_CAST "distance");
307 featureAddressLines++;
311 fprintf( stderr, "Too many address elements (%s%s)\n", feature.type, feature.id);
317 fprintf(stderr, "%s: Unknown element name: %s\n", __FUNCTION__, name);
320 void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
323 const char * paramValues[11];
325 char * partionQueryName;
326 int i, namePos, lineTypeLen, lineValueLen;
328 if (xmlStrEqual(name, BAD_CAST "feature"))
331 if (featureCount % 1000 == 0) printf("feature %i(k)\n", featureCount/1000);
333 if (fileMode == FILEMODE_ADD)
335 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 0, NULL, NULL, NULL, 0);
336 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
338 fprintf(stderr, "get_place_id: INSERT failed: %s", PQerrorMessage(conn));
345 paramValues[0] = (const char *)feature.type;
346 paramValues[1] = (const char *)feature.id;
347 paramValues[2] = (const char *)feature.key;
348 paramValues[3] = (const char *)feature.value;
349 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 4, paramValues, NULL, NULL, 0);
350 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
352 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
358 place_id = (char *)feature.placeID;
360 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_DELETE || fileMode == FILEMODE_ADD)
362 paramValues[0] = (const char *)place_id;
363 res = PQexecPrepared(conn, "placex_delete", 1, paramValues, NULL, NULL, 0);
364 if (PQresultStatus(res) != PGRES_COMMAND_OK)
366 fprintf(stderr, "placex_delete: DELETE failed: %s", PQerrorMessage(conn));
372 res = PQexecPrepared(conn, "search_name_delete", 1, paramValues, NULL, NULL, 0);
373 if (PQresultStatus(res) != PGRES_COMMAND_OK)
375 fprintf(stderr, "search_name_delete: DELETE failed: %s", PQerrorMessage(conn));
381 res = PQexecPrepared(conn, "place_addressline_delete", 1, paramValues, NULL, NULL, 0);
382 if (PQresultStatus(res) != PGRES_COMMAND_OK)
384 fprintf(stderr, "place_addressline_delete: DELETE failed: %s", PQerrorMessage(conn));
390 partionQueryName = xmlHashLookup2(partionTableTagsHashDelete, feature.key, feature.value);
391 if (partionQueryName)
393 res = PQexecPrepared(conn, partionQueryName, 1, paramValues, NULL, NULL, 0);
394 if (PQresultStatus(res) != PGRES_COMMAND_OK)
396 fprintf(stderr, "%s: DELETE failed: %s", partionQueryName, PQerrorMessage(conn));
404 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_ADD)
406 // Insert into placex
407 paramValues[0] = (const char *)place_id;
408 paramValues[1] = (const char *)feature.type;
409 paramValues[2] = (const char *)feature.id;
410 paramValues[3] = (const char *)feature.key;
411 paramValues[4] = (const char *)feature.value;
413 featureNameString[0] = 0;
414 if (featureNameLines)
419 for (i = 0; i < featureNameLines; i++)
421 lineTypeLen = (int)strlen((char *) featureName[i].type);
422 lineValueLen = (int)strlen((char *) featureName[i].value);
423 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATURENAMESTRING)
425 fprintf(stderr, "feature name too long: %s", (const char *)featureName[i].value);
428 if (namePos) strcpy(featureNameString+(namePos++), ",");
429 strcpy(featureNameString+(namePos++), "\"");
430 strcpy(featureNameString+namePos, (char*) featureName[i].type);
431 namePos += lineTypeLen;
432 strcpy(featureNameString+namePos, "\"=>\"");
434 strcpy(featureNameString+namePos, (char *) featureName[i].value);
435 namePos += lineValueLen;
436 strcpy(featureNameString+(namePos++), "\"");
439 paramValues[5] = (const char *)featureNameString;
441 featureExtraTagString[0] = 0;
442 if (featureExtraTagLines)
447 for (i = 0; i < featureExtraTagLines; i++)
449 lineTypeLen = strlen((char *) featureExtraTag[i].type);
450 lineValueLen = strlen((char *) featureExtraTag[i].value);
451 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATUREEXTRATAGSTRING)
453 fprintf(stderr, "feature extra tag too long: %s", (const char *)featureExtraTag[i].value);
456 if (namePos) strcpy(featureExtraTagString+(namePos++),",");
457 strcpy(featureExtraTagString+(namePos++), "\"");
458 strcpy(featureExtraTagString+namePos, (char *) featureExtraTag[i].type);
459 namePos += lineTypeLen;
460 strcpy(featureExtraTagString+namePos, "\"=>\"");
462 strcpy(featureExtraTagString+namePos, (char *) featureExtraTag[i].value);
463 namePos += lineValueLen;
464 strcpy(featureExtraTagString+(namePos++), "\"");
467 paramValues[6] = (const char *)featureExtraTagString;
469 paramValues[7] = (const char *)feature.parentPlaceID;
471 paramValues[8] = (const char *)feature.adminLevel;
472 paramValues[9] = (const char *)feature.houseNumber;
473 paramValues[10] = (const char *)feature.rankAddress;
474 paramValues[11] = (const char *)feature.rankSearch;
475 paramValues[12] = (const char *)feature.geometry;
476 if (strlen(paramValues[3]))
478 res = PQexecPrepared(conn, "placex_insert", 13, paramValues, NULL, NULL, 0);
479 if (PQresultStatus(res) != PGRES_COMMAND_OK)
481 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
482 fprintf(stderr, "index_placex: INSERT failed: %s %s %s", paramValues[0], paramValues[1], paramValues[2]);
489 for (i = 0; i < featureAddressLines; i++)
491 // insert into place_address
492 paramValues[0] = (const char *)place_id;
493 paramValues[1] = (const char *)featureAddress[i].distance;
494 paramValues[2] = (const char *)featureAddress[i].type;
495 paramValues[3] = (const char *)featureAddress[i].id;
496 paramValues[4] = (const char *)featureAddress[i].key;
497 paramValues[5] = (const char *)featureAddress[i].value;
498 paramValues[6] = (const char *)featureAddress[i].isAddress;
499 res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
500 if (PQresultStatus(res) != PGRES_COMMAND_OK)
502 fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
508 xmlFree(featureAddress[i].type);
509 xmlFree(featureAddress[i].id);
510 xmlFree(featureAddress[i].key);
511 xmlFree(featureAddress[i].value);
512 xmlFree(featureAddress[i].distance);
515 if (featureNameLines)
517 paramValues[0] = (const char *)place_id;
518 res = PQexecPrepared(conn, "search_name_insert", 1, paramValues, NULL, NULL, 0);
519 if (PQresultStatus(res) != PGRES_COMMAND_OK)
521 fprintf(stderr, "search_name_insert: INSERT failed: %s", PQerrorMessage(conn));
528 partionQueryName = xmlHashLookup2(partionTableTagsHash, feature.key, feature.value);
529 if (partionQueryName)
531 // insert into partition table
532 paramValues[0] = (const char *)place_id;
533 paramValues[1] = (const char *)feature.geometry;
534 res = PQexecPrepared(conn, partionQueryName, 2, paramValues, NULL, NULL, 0);
535 if (PQresultStatus(res) != PGRES_COMMAND_OK)
537 fprintf(stderr, "%s: INSERT failed: %s", partionQueryName, PQerrorMessage(conn));
547 for (i = 0; i < featureAddressLines; i++)
549 xmlFree(featureAddress[i].type);
550 xmlFree(featureAddress[i].id);
551 xmlFree(featureAddress[i].key);
552 xmlFree(featureAddress[i].value);
553 xmlFree(featureAddress[i].distance);
557 xmlFree(feature.placeID);
558 xmlFree(feature.type);
560 xmlFree(feature.key);
561 xmlFree(feature.value);
562 xmlFree(feature.rankAddress);
563 xmlFree(feature.rankSearch);
564 // if (feature.name) xmlFree(feature.name);
565 if (feature.countryCode) xmlFree(feature.countryCode);
566 if (feature.adminLevel) xmlFree(feature.adminLevel);
567 if (feature.houseNumber) xmlFree(feature.houseNumber);
568 if (feature.geometry) xmlFree(feature.geometry);
570 // PQclear(resPlaceID);
574 static void processNode(xmlTextReaderPtr reader)
577 name = xmlTextReaderName(reader);
580 name = xmlStrdup(BAD_CAST "--");
583 switch (xmlTextReaderNodeType(reader))
585 case XML_READER_TYPE_ELEMENT:
586 StartElement(reader, name);
587 if (xmlTextReaderIsEmptyElement(reader))
588 EndElement(reader, name); /* No end_element for self closing tags! */
590 case XML_READER_TYPE_END_ELEMENT:
591 EndElement(reader, name);
593 case XML_READER_TYPE_TEXT:
594 case XML_READER_TYPE_CDATA:
595 case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
599 fprintf(stderr, "Unknown node type %d\n", xmlTextReaderNodeType(reader));
606 int nominatim_import(const char *conninfo, const char *partionTagsFilename, const char *filename)
608 xmlTextReaderPtr reader;
611 FILE * partionTagsFile;
612 char * partionQueryName;
613 char partionQuerySQL[1024];
615 conn = PQconnectdb(conninfo);
616 if (PQstatus(conn) != CONNECTION_OK)
618 fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
622 partionTableTagsHash = xmlHashCreate(200);
623 partionTableTagsHashDelete = xmlHashCreate(200);
625 partionTagsFile = fopen(partionTagsFilename, "rt");
626 if (!partionTagsFile)
628 fprintf(stderr, "Unable to read partition tags file: %s\n", partionTagsFilename);
632 char buffer[1024], osmkey[256], osmvalue[256];
634 while (fgets(buffer, sizeof(buffer), partionTagsFile) != NULL)
636 fields = sscanf( buffer, "%23s %63s", osmkey, osmvalue );
638 if ( fields <= 0 ) continue;
642 fprintf( stderr, "Error partition file\n");
645 partionQueryName = malloc(strlen("partition_insert_")+strlen(osmkey)+strlen(osmvalue)+2);
646 strcpy(partionQueryName, "partition_insert_");
647 strcat(partionQueryName, osmkey);
648 strcat(partionQueryName, "_");
649 strcat(partionQueryName, osmvalue);
651 strcpy(partionQuerySQL, "insert into place_classtype_");
652 strcat(partionQuerySQL, osmkey);
653 strcat(partionQuerySQL, "_");
654 strcat(partionQuerySQL, osmvalue);
655 strcat(partionQuerySQL, " (place_id, centroid) values ($1, ST_Centroid(st_setsrid($2, 4326)))");
657 res = PQprepare(conn, partionQueryName, partionQuerySQL, 2, NULL);
658 if (PQresultStatus(res) != PGRES_COMMAND_OK)
660 fprintf(stderr, "Failed to prepare %s: %s\n", partionQueryName, PQerrorMessage(conn));
664 xmlHashAddEntry2(partionTableTagsHash, BAD_CAST osmkey, BAD_CAST osmvalue, BAD_CAST partionQueryName);
666 partionQueryName = malloc(strlen("partition_delete_")+strlen(osmkey)+strlen(osmvalue)+2);
667 strcpy(partionQueryName, "partition_delete_");
668 strcat(partionQueryName, osmkey);
669 strcat(partionQueryName, "_");
670 strcat(partionQueryName, osmvalue);
672 strcpy(partionQuerySQL, "delete from place_classtype_");
673 strcat(partionQuerySQL, osmkey);
674 strcat(partionQuerySQL, "_");
675 strcat(partionQuerySQL, osmvalue);
676 strcat(partionQuerySQL, " where place_id = $1::integer");
678 res = PQprepare(conn, partionQueryName, partionQuerySQL, 1, NULL);
679 if (PQresultStatus(res) != PGRES_COMMAND_OK)
681 fprintf(stderr, "Failed to prepare %s: %s\n", partionQueryName, PQerrorMessage(conn));
685 xmlHashAddEntry2(partionTableTagsHashDelete, BAD_CAST osmkey, BAD_CAST osmvalue, BAD_CAST partionQueryName);
688 res = PQprepare(conn, "get_new_place_id",
689 "select nextval('seq_place')",
691 if (PQresultStatus(res) != PGRES_COMMAND_OK)
693 fprintf(stderr, "Failed to prepare get_new_place_id: %s\n", PQerrorMessage(conn));
697 res = PQprepare(conn, "get_place_id",
698 "select place_id from placex where osm_type = $1 and osm_id = $2 and class = $3 and type = $4",
700 if (PQresultStatus(res) != PGRES_COMMAND_OK)
702 fprintf(stderr, "Failed to prepare get_place_id: %s\n", PQerrorMessage(conn));
706 res = PQprepare(conn, "placex_insert",
707 "insert into placex (place_id,osm_type,osm_id,class,type,name,extratags,parent_place_id,admin_level,housenumber,rank_address,rank_search,geometry) "
708 "values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, st_setsrid($13, 4326))",
710 if (PQresultStatus(res) != PGRES_COMMAND_OK)
712 fprintf(stderr, "Failed to prepare placex_insert: %s\n", PQerrorMessage(conn));
716 res = PQprepare(conn, "search_name_insert",
717 "insert into search_name (place_id, search_rank, address_rank, country_code, name_vector, nameaddress_vector, centroid) "
718 "select place_id, rank_address, rank_search, country_code, make_keywords(name), "
719 "(select uniq(sort(array_agg(name_vector))) from place_addressline join search_name on "
720 "(address_place_id = search_name.place_id) where place_addressline.place_id = $1 ), st_centroid(geometry) from placex "
721 "where place_id = $1",
723 if (PQresultStatus(res) != PGRES_COMMAND_OK)
725 fprintf(stderr, "Failed to prepare search_name_insert: %s\n", PQerrorMessage(conn));
729 res = PQprepare(conn, "place_addressline_insert",
730 "insert into place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) "
731 "select $1, place_id, false, $7, $2, rank_address from placex where osm_type = $3 and osm_id = $4 and class = $5 and type = $6",
733 if (PQresultStatus(res) != PGRES_COMMAND_OK)
735 fprintf(stderr, "Failed to prepare place_addressline_insert: %s\n", PQerrorMessage(conn));
739 res = PQprepare(conn, "placex_delete",
740 "delete from placex where place_id = $1",
742 if (PQresultStatus(res) != PGRES_COMMAND_OK)
744 fprintf(stderr, "Failed to prepare placex_delete: %s\n", PQerrorMessage(conn));
748 res = PQprepare(conn, "search_name_delete",
749 "delete from search_name where place_id = $1",
751 if (PQresultStatus(res) != PGRES_COMMAND_OK)
753 fprintf(stderr, "Failed to prepare search_name_delete: %s\n", PQerrorMessage(conn));
757 res = PQprepare(conn, "place_addressline_delete",
758 "delete from place_addressline where place_id = $1",
760 if (PQresultStatus(res) != PGRES_COMMAND_OK)
762 fprintf(stderr, "Failed to prepare place_addressline_delete: %s\n", PQerrorMessage(conn));
768 reader = inputUTF8(filename);
772 fprintf(stderr, "Unable to open %s\n", filename);
776 ret = xmlTextReaderRead(reader);
780 ret = xmlTextReaderRead(reader);
784 fprintf(stderr, "%s : failed to parse\n", filename);
788 xmlFreeTextReader(reader);
789 xmlHashFree(partionTableTagsHash, NULL);
790 xmlHashFree(partionTableTagsHashDelete, NULL);