8 #include <libxml/xmlstring.h>
9 #include <libxml/xmlreader.h>
10 #include <libxml/hash.h>
12 #include "nominatim.h"
16 typedef enum { FILETYPE_NONE, FILETYPE_STRUCTUREDV0P1 } filetypes_t;
17 typedef enum { FILEMODE_NONE, FILEMODE_ADD, FILEMODE_UPDATE, FILEMODE_DELETE } filemodes_t;
19 #define MAX_FEATUREADDRESS 500
20 #define MAX_FEATURENAMES 1000
21 #define MAX_FEATUREEXTRATAGS 100
22 #define MAX_FEATURENAMESTRING 100000
23 #define MAX_FEATUREEXTRATAGSTRING 50000
25 struct feature_address {
47 xmlChar * rankAddress;
49 xmlChar * countryCode;
51 xmlChar * houseNumber;
55 int fileType = FILETYPE_NONE;
56 int fileMode = FILEMODE_ADD;
58 struct feature_address featureAddress[MAX_FEATUREADDRESS];
59 struct feature_tag featureName[MAX_FEATURENAMES];
60 struct feature_tag featureExtraTag[MAX_FEATUREEXTRATAGS];
61 struct feature feature;
62 int featureAddressLines = 0;
63 int featureNameLines = 0;
64 int featureExtraTagLines = 0;
66 xmlHashTablePtr partionTableTagsHash;
67 char featureNameString[MAX_FEATURENAMESTRING];
68 char featureExtraTagString[MAX_FEATUREEXTRATAGSTRING];
70 void StartElement(xmlTextReaderPtr reader, const xmlChar *name)
76 if (fileType == FILETYPE_NONE)
78 // Potential to handle other file types in the future / versions
79 if (xmlStrEqual(name, BAD_CAST "osmStructured"))
81 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "version");
82 version = strtof(value, NULL);
85 if (version == (float)0.1)
87 fileType = FILETYPE_STRUCTUREDV0P1;
88 fileMode = FILEMODE_ADD;
92 fprintf( stderr, "Unknown osmStructured version %f\n", version );
98 fprintf( stderr, "Unknown XML document type: %s\n", name );
104 if (xmlStrEqual(name, BAD_CAST "add"))
106 fileMode = FILEMODE_ADD;
109 if (xmlStrEqual(name, BAD_CAST "update"))
111 fileMode = FILEMODE_UPDATE;
114 if (xmlStrEqual(name, BAD_CAST "delete"))
116 fileMode = FILEMODE_DELETE;
119 if (fileMode == FILEMODE_NONE)
121 fprintf( stderr, "Unknown import mode in: %s\n", name );
125 if (xmlStrEqual(name, BAD_CAST "feature"))
127 feature.type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
128 feature.id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
129 feature.key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
130 feature.value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
131 feature.rankAddress = xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
132 feature.rankSearch = xmlTextReaderGetAttribute(reader, BAD_CAST "importance");
134 feature.countryCode = NULL;
135 feature.adminLevel = NULL;
136 feature.houseNumber = NULL;
137 feature.geometry = NULL;
138 featureAddressLines = 0;
139 featureNameLines = 0;
143 if (xmlStrEqual(name, BAD_CAST "names")) return;
144 if (xmlStrEqual(name, BAD_CAST "name"))
146 featureName[featureNameLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
147 featureName[featureNameLines].value = xmlTextReaderReadString(reader);
149 if (featureNameLines >= MAX_FEATURENAMES)
151 fprintf( stderr, "Too many name elements\n");
156 if (xmlStrEqual(name, BAD_CAST "tags")) return;
157 if (xmlStrEqual(name, BAD_CAST "tag"))
159 featureExtraTag[featureExtraTagLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
160 featureExtraTag[featureExtraTagLines].value = xmlTextReaderReadString(reader);
161 featureExtraTagLines++;
162 if (featureExtraTagLines >= MAX_FEATUREEXTRATAGS)
164 fprintf( stderr, "Too many extra tag elements\n");
169 if (xmlStrEqual(name, BAD_CAST "osmGeometry"))
171 feature.geometry = xmlTextReaderReadString(reader);
174 if (xmlStrEqual(name, BAD_CAST "adminLevel"))
176 feature.adminLevel = xmlTextReaderReadString(reader);
179 if (xmlStrEqual(name, BAD_CAST "countryCode"))
181 feature.countryCode = xmlTextReaderReadString(reader);
184 if (xmlStrEqual(name, BAD_CAST "houseNumber"))
186 feature.houseNumber = xmlTextReaderReadString(reader);
189 if (xmlStrEqual(name, BAD_CAST "address"))
191 featureAddressLines = 0;
195 if (xmlStrEqual(name, BAD_CAST "continent"))
199 else if (xmlStrEqual(name, BAD_CAST "sea"))
203 else if (xmlStrEqual(name, BAD_CAST "country"))
207 else if (xmlStrEqual(name, BAD_CAST "state"))
211 else if (xmlStrEqual(name, BAD_CAST "county"))
215 else if (xmlStrEqual(name, BAD_CAST "city"))
219 else if (xmlStrEqual(name, BAD_CAST "town"))
223 else if (xmlStrEqual(name, BAD_CAST "village"))
227 else if (xmlStrEqual(name, BAD_CAST "unknown"))
231 else if (xmlStrEqual(name, BAD_CAST "suburb"))
235 else if (xmlStrEqual(name, BAD_CAST "postcode"))
239 else if (xmlStrEqual(name, BAD_CAST "neighborhood"))
243 else if (xmlStrEqual(name, BAD_CAST "street"))
247 else if (xmlStrEqual(name, BAD_CAST "access"))
251 else if (xmlStrEqual(name, BAD_CAST "building"))
255 else if (xmlStrEqual(name, BAD_CAST "other"))
261 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "rank");
264 fprintf( stderr, "Address element missing rank\n");
267 featureAddress[featureAddressLines].rankAddress = atoi(value);
270 value = (char*)xmlTextReaderGetAttribute(reader, BAD_CAST "isaddress");
273 fprintf( stderr, "Address element missing rank\n");
276 if (*value == 't') strcpy(featureAddress[featureAddressLines].isAddress, "t");
277 else strcpy(featureAddress[featureAddressLines].isAddress, "f");
280 featureAddress[featureAddressLines].type = xmlTextReaderGetAttribute(reader, BAD_CAST "type");
281 featureAddress[featureAddressLines].id = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
282 featureAddress[featureAddressLines].key = xmlTextReaderGetAttribute(reader, BAD_CAST "key");
283 featureAddress[featureAddressLines].value = xmlTextReaderGetAttribute(reader, BAD_CAST "value");
284 featureAddress[featureAddressLines].distance = xmlTextReaderGetAttribute(reader, BAD_CAST "distance");
286 featureAddressLines++;
287 if (featureAddressLines >= MAX_FEATUREADDRESS)
289 fprintf( stderr, "Too many address elements\n");
295 fprintf(stderr, "%s: Unknown element name: %s\n", __FUNCTION__, name);
298 void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
301 PGresult * resPlaceID;
302 const char * paramValues[11];
304 char * partionQueryName;
305 int i, namePos, lineTypeLen, lineValueLen;
307 if (xmlStrEqual(name, BAD_CAST "feature"))
310 if (featureCount % 1000 == 0) printf("feature %i(k)\n", featureCount/1000);
312 if (fileMode == FILEMODE_ADD)
314 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 0, NULL, NULL, NULL, 0);
315 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
317 fprintf(stderr, "get_place_id: INSERT failed: %s", PQerrorMessage(conn));
324 paramValues[0] = (const char *)feature.type;
325 paramValues[1] = (const char *)feature.id;
326 paramValues[2] = (const char *)feature.key;
327 paramValues[3] = (const char *)feature.value;
328 resPlaceID = PQexecPrepared(conn, "get_new_place_id", 4, paramValues, NULL, NULL, 0);
329 if (PQresultStatus(resPlaceID) != PGRES_TUPLES_OK)
331 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
336 place_id = PQgetvalue(resPlaceID, 0, 0);
338 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_DELETE)
340 paramValues[0] = (const char *)place_id;
341 res = PQexecPrepared(conn, "placex_delete", 1, paramValues, NULL, NULL, 0);
342 if (PQresultStatus(res) != PGRES_COMMAND_OK)
344 fprintf(stderr, "placex_delete: DELETE failed: %s", PQerrorMessage(conn));
350 res = PQexecPrepared(conn, "search_name_delete", 1, paramValues, NULL, NULL, 0);
351 if (PQresultStatus(res) != PGRES_COMMAND_OK)
353 fprintf(stderr, "search_name_delete: DELETE failed: %s", PQerrorMessage(conn));
359 res = PQexecPrepared(conn, "place_addressline_delete", 1, paramValues, NULL, NULL, 0);
360 if (PQresultStatus(res) != PGRES_COMMAND_OK)
362 fprintf(stderr, "place_addressline_delete: DELETE failed: %s", PQerrorMessage(conn));
369 if (fileMode == FILEMODE_UPDATE || fileMode == FILEMODE_ADD)
371 // Insert into placex
372 paramValues[0] = (const char *)place_id;
373 paramValues[1] = (const char *)feature.type;
374 paramValues[2] = (const char *)feature.id;
375 paramValues[3] = (const char *)feature.key;
376 paramValues[4] = (const char *)feature.value;
378 featureNameString[0] = 0;
379 if (featureNameLines)
384 for(i = 0; i < featureNameLines; i++)
386 lineTypeLen = strlen(BAD_CAST featureName[i].type);
387 lineValueLen = strlen(BAD_CAST featureName[i].value);
388 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATURENAMESTRING)
390 fprintf(stderr, "feature name too long: %s", (const char *)featureName[i].value);
393 if (namePos) strcpy(featureNameString+(namePos++), ",");
394 strcpy(featureNameString+(namePos++), "\"");
395 strcpy(featureNameString+namePos, BAD_CAST featureName[i].type);
396 namePos += lineTypeLen;
397 strcpy(featureNameString+namePos, "\"=>\"");
399 strcpy(featureNameString+namePos, BAD_CAST featureName[i].value);
400 namePos += lineValueLen;
401 strcpy(featureNameString+(namePos++), "\"");
404 paramValues[5] = (const char *)featureNameString;
406 featureExtraTagString[0] = 0;
407 if (featureExtraTagLines)
412 for(i = 0; i < featureExtraTagLines; i++)
414 lineTypeLen = strlen(BAD_CAST featureExtraTag[i].type);
415 lineValueLen = strlen(BAD_CAST featureExtraTag[i].value);
416 if (namePos+lineTypeLen+lineValueLen+7 > MAX_FEATUREEXTRATAGSTRING)
418 fprintf(stderr, "feature extra tag too long: %s", (const char *)featureExtraTag[i].value);
421 if (namePos) strcpy(featureExtraTagString+(namePos++),",");
422 strcpy(featureExtraTagString+(namePos++), "\"");
423 strcpy(featureExtraTagString+namePos, BAD_CAST featureExtraTag[i].type);
424 namePos += lineTypeLen;
425 strcpy(featureExtraTagString+namePos, "\"=>\"");
427 strcpy(featureExtraTagString+namePos, BAD_CAST featureExtraTag[i].value);
428 namePos += lineValueLen;
429 strcpy(featureExtraTagString+(namePos++), "\"");
432 paramValues[6] = (const char *)featureExtraTagString;
434 paramValues[7] = (const char *)feature.adminLevel;
435 paramValues[8] = (const char *)feature.houseNumber;
436 paramValues[9] = (const char *)feature.rankAddress;
437 paramValues[10] = (const char *)feature.rankSearch;
438 paramValues[11] = (const char *)feature.geometry;
439 res = PQexecPrepared(conn, "placex_insert", 12, paramValues, NULL, NULL, 0);
440 if (PQresultStatus(res) != PGRES_COMMAND_OK)
442 fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
448 for(i = 0; i < featureAddressLines; i++)
450 // insert into place_address
451 paramValues[0] = (const char *)place_id;
452 paramValues[1] = (const char *)featureAddress[i].distance;
453 paramValues[2] = (const char *)featureAddress[i].type;
454 paramValues[3] = (const char *)featureAddress[i].id;
455 paramValues[4] = (const char *)featureAddress[i].key;
456 paramValues[5] = (const char *)featureAddress[i].value;
457 paramValues[6] = (const char *)featureAddress[i].isAddress;
458 res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
459 if (PQresultStatus(res) != PGRES_COMMAND_OK)
461 fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
467 xmlFree(featureAddress[i].type);
468 xmlFree(featureAddress[i].id);
469 xmlFree(featureAddress[i].key);
470 xmlFree(featureAddress[i].value);
471 xmlFree(featureAddress[i].distance);
474 if (featureNameLines)
476 paramValues[0] = (const char *)place_id;
477 res = PQexecPrepared(conn, "search_name_insert", 1, paramValues, NULL, NULL, 0);
478 if (PQresultStatus(res) != PGRES_COMMAND_OK)
480 fprintf(stderr, "search_name_insert: INSERT failed: %s", PQerrorMessage(conn));
487 partionQueryName = xmlHashLookup2(partionTableTagsHash, feature.key, feature.value);
488 if (partionQueryName)
490 // insert into partition table
491 paramValues[0] = (const char *)place_id;
492 paramValues[1] = (const char *)feature.geometry;
493 res = PQexecPrepared(conn, partionQueryName, 2, paramValues, NULL, NULL, 0);
494 if (PQresultStatus(res) != PGRES_COMMAND_OK)
496 fprintf(stderr, "%s: INSERT failed: %s", partionQueryName, PQerrorMessage(conn));
507 for(i = 0; i < featureAddressLines; i++)
509 xmlFree(featureAddress[i].type);
510 xmlFree(featureAddress[i].id);
511 xmlFree(featureAddress[i].key);
512 xmlFree(featureAddress[i].value);
513 xmlFree(featureAddress[i].distance);
517 xmlFree(feature.type);
519 xmlFree(feature.key);
520 xmlFree(feature.value);
521 xmlFree(feature.rankAddress);
522 xmlFree(feature.rankSearch);
523 // if (feature.name) xmlFree(feature.name);
524 if (feature.countryCode) xmlFree(feature.countryCode);
525 if (feature.adminLevel) xmlFree(feature.adminLevel);
526 if (feature.houseNumber) xmlFree(feature.houseNumber);
527 if (feature.geometry) xmlFree(feature.geometry);
533 static void processNode(xmlTextReaderPtr reader)
536 name = xmlTextReaderName(reader);
539 name = xmlStrdup(BAD_CAST "--");
542 switch(xmlTextReaderNodeType(reader))
544 case XML_READER_TYPE_ELEMENT:
545 StartElement(reader, name);
546 if (xmlTextReaderIsEmptyElement(reader))
547 EndElement(reader, name); /* No end_element for self closing tags! */
549 case XML_READER_TYPE_END_ELEMENT:
550 EndElement(reader, name);
552 case XML_READER_TYPE_TEXT:
553 case XML_READER_TYPE_CDATA:
554 case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
558 fprintf(stderr, "Unknown node type %d\n", xmlTextReaderNodeType(reader));
565 int nominatim_import(const char *conninfo, const char *partionTagsFilename, const char *filename)
567 xmlTextReaderPtr reader;
570 FILE * partionTagsFile;
571 char * partionQueryName;
572 char partionQuerySQL[1024];
574 conn = PQconnectdb(conninfo);
575 if (PQstatus(conn) != CONNECTION_OK)
577 fprintf(stderr, "Connection to database failed: %s\n", PQerrorMessage(conn));
581 partionTableTagsHash = xmlHashCreate(200);
583 partionTagsFile = fopen(partionTagsFilename, "rt");
584 if (!partionTagsFile)
586 fprintf(stderr, "Unable to read partition tags file: %s\n", partionTagsFilename);
590 char buffer[1024], osmkey[256], osmvalue[256];
592 while(fgets(buffer, sizeof(buffer), partionTagsFile) != NULL)
594 fields = sscanf( buffer, "%23s %63s", osmkey, osmvalue );
596 if( fields <= 0 ) continue;
600 fprintf( stderr, "Error partition file\n");
603 partionQueryName = malloc(strlen("partition_insert_")+strlen(osmkey)+strlen(osmvalue)+2);
604 strcpy(partionQueryName, "partition_insert_");
605 strcat(partionQueryName, osmkey);
606 strcat(partionQueryName, "_");
607 strcat(partionQueryName, osmvalue);
609 strcpy(partionQuerySQL, "insert into place_classtype_");
610 strcat(partionQuerySQL, osmkey);
611 strcat(partionQuerySQL, "_");
612 strcat(partionQuerySQL, osmvalue);
613 strcat(partionQuerySQL, " (place_id, centroid) values ($1, ST_Centroid(st_setsrid($2, 4326)))");
615 res = PQprepare(conn, partionQueryName, partionQuerySQL, 2, NULL);
616 if (PQresultStatus(res) != PGRES_COMMAND_OK)
618 fprintf(stderr, "Failed to prepare %s: %s\n", partionQueryName, PQerrorMessage(conn));
622 xmlHashAddEntry2(partionTableTagsHash, BAD_CAST osmkey, BAD_CAST osmvalue, BAD_CAST partionQueryName);
625 res = PQprepare(conn, "get_new_place_id",
626 "select nextval('seq_place')",
628 if (PQresultStatus(res) != PGRES_COMMAND_OK)
630 fprintf(stderr, "Failed to prepare get_new_place_id: %s\n", PQerrorMessage(conn));
634 res = PQprepare(conn, "get_place_id",
635 "select place_id from placex where osm_type = $1 and osm_id = $2 and class = $3 and type = $4",
637 if (PQresultStatus(res) != PGRES_COMMAND_OK)
639 fprintf(stderr, "Failed to prepare get_place_id: %s\n", PQerrorMessage(conn));
643 res = PQprepare(conn, "placex_insert",
644 "insert into placex (place_id,osm_type,osm_id,class,type,name,extratags,admin_level,housenumber,rank_address,rank_search,geometry) "
645 "values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, st_setsrid($12, 4326))",
647 if (PQresultStatus(res) != PGRES_COMMAND_OK)
649 fprintf(stderr, "Failed to prepare placex_insert: %s\n", PQerrorMessage(conn));
653 res = PQprepare(conn, "search_name_insert",
654 "insert into search_name (place_id, search_rank, address_rank, country_code, name_vector, nameaddress_vector, centroid) "
655 "select place_id, rank_address, rank_search, country_code, make_keywords(name), "
656 "(select uniq(sort(array_agg(name_vector))) from place_addressline join search_name on "
657 "(address_place_id = search_name.place_id) where place_addressline.place_id = $1 ), st_centroid(geometry) from placex "
658 "where place_id = $1",
660 if (PQresultStatus(res) != PGRES_COMMAND_OK)
662 fprintf(stderr, "Failed to prepare search_name_insert: %s\n", PQerrorMessage(conn));
666 res = PQprepare(conn, "place_addressline_insert",
667 "insert into place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) "
668 "select $1, place_id, false, $7, $2, rank_address from placex where osm_type = $3 and osm_id = $4 and class = $5 and type = $6",
670 if (PQresultStatus(res) != PGRES_COMMAND_OK)
672 fprintf(stderr, "Failed to prepare place_addressline_insert: %s\n", PQerrorMessage(conn));
676 res = PQprepare(conn, "placex_delete",
677 "delete from placex where place_id = $1",
679 if (PQresultStatus(res) != PGRES_COMMAND_OK)
681 fprintf(stderr, "Failed to prepare placex_delete: %s\n", PQerrorMessage(conn));
685 res = PQprepare(conn, "search_name_delete",
686 "delete from search_name where place_id = $1",
688 if (PQresultStatus(res) != PGRES_COMMAND_OK)
690 fprintf(stderr, "Failed to prepare search_name_delete: %s\n", PQerrorMessage(conn));
694 res = PQprepare(conn, "place_addressline_delete",
695 "delete from place_addressline where place_id = $1",
697 if (PQresultStatus(res) != PGRES_COMMAND_OK)
699 fprintf(stderr, "Failed to prepare place_addressline_delete: %s\n", PQerrorMessage(conn));
705 reader = inputUTF8(filename);
709 fprintf(stderr, "Unable to open %s\n", filename);
713 ret = xmlTextReaderRead(reader);
717 ret = xmlTextReaderRead(reader);
720 fprintf(stderr, "%s : failed to parse\n", filename);
724 xmlFreeTextReader(reader);
725 xmlHashFree(partionTableTagsHash, NULL);