1 #define _FILE_OFFSET_BITS 64
2 #define _LARGEFILE64_SOURCE
11 #include <sys/types.h>
17 #include <libxml/xmlreader.h>
24 enum { plainFile, gzipFile, bzip2File } type;
26 // needed by bzip2 when decompressing from multiple streams. other
27 // decompressors must ignore it.
31 int buf_ptr, buf_fill;
34 // tries to re-open the bz stream at the next stream start.
35 // returns 0 on success, -1 on failure.
36 int bzReOpen(struct Input *ctx, int *error) {
37 // for copying out the last unused part of the block which
38 // has an EOS token in it. needed for re-initialising the
40 unsigned char unused[BZ_MAX_UNUSED];
41 void *unused_tmp_ptr = NULL;
44 BZ2_bzReadGetUnused(error, (BZFILE *)(ctx->fileHandle), &unused_tmp_ptr, &nUnused);
45 if (*error != BZ_OK) return -1;
47 // when bzReadClose is called the unused buffer is deallocated,
48 // so it needs to be copied somewhere safe first.
49 for (i = 0; i < nUnused; ++i)
50 unused[i] = ((unsigned char *)unused_tmp_ptr)[i];
52 BZ2_bzReadClose(error, (BZFILE *)(ctx->fileHandle));
53 if (*error != BZ_OK) return -1;
55 // reassign the file handle
56 ctx->fileHandle = BZ2_bzReadOpen(error, ctx->systemHandle, 0, 0, unused, nUnused);
57 if (ctx->fileHandle == NULL || *error != BZ_OK) return -1;
62 int readFile(void *context, char * buffer, int len)
64 struct Input *ctx = context;
65 void *f = ctx->fileHandle;
68 if (ctx->eof || (len == 0))
73 l = read(*(int *)f, buffer, len);
74 if (l <= 0) ctx->eof = 1;
77 l = gzread((gzFile)f, buffer, len);
78 if (l <= 0) ctx->eof = 1;
81 l = BZ2_bzRead(&error, (BZFILE *)f, buffer, len);
83 // error codes BZ_OK and BZ_STREAM_END are both "OK", but the stream
84 // end means the reader needs to be reset from the original handle.
86 // for stream errors, try re-opening the stream before admitting defeat.
87 if (error != BZ_STREAM_END || bzReOpen(ctx, &error) != 0) {
94 fprintf(stderr, "Bad file type\n");
99 fprintf(stderr, "File reader received error %d (%d)\n", l, error);
106 char inputGetChar(void *context)
108 struct Input *ctx = context;
110 if (ctx->buf_ptr == ctx->buf_fill) {
111 ctx->buf_fill = readFile(context, &ctx->buf[0], sizeof(ctx->buf));
113 if (ctx->buf_fill == 0)
115 if (ctx->buf_fill < 0) {
116 perror("Error while reading file");
120 //readFile(context, &c, 1);
121 return ctx->buf[ctx->buf_ptr++];
124 int inputEof(void *context)
126 return ((struct Input *)context)->eof;
129 void *inputOpen(const char *name)
131 const char *ext = strrchr(name, '.');
132 struct Input *ctx = malloc (sizeof(*ctx));
137 memset(ctx, 0, sizeof(*ctx));
139 ctx->name = strdup(name);
141 if (ext && !strcmp(ext, ".gz")) {
142 ctx->fileHandle = (void *)gzopen(name, "rb");
143 ctx->type = gzipFile;
144 } else if (ext && !strcmp(ext, ".bz2")) {
146 ctx->systemHandle = fopen(name, "rb");
147 if (!ctx->systemHandle) {
148 fprintf(stderr, "error while opening file %s\n", name);
152 ctx->fileHandle = (void *)BZ2_bzReadOpen(&error, ctx->systemHandle, 0, 0, NULL, 0);
153 ctx->type = bzip2File;
156 int *pfd = malloc(sizeof(pfd));
158 if (!strcmp(name, "-")) {
161 int flags = O_RDONLY;
163 flags |= O_LARGEFILE;
165 *pfd = open(name, flags);
172 ctx->fileHandle = (void *)pfd;
173 ctx->type = plainFile;
175 if (!ctx->fileHandle) {
176 fprintf(stderr, "error while opening file %s\n", name);
184 int inputClose(void *context)
186 struct Input *ctx = context;
187 void *f = ctx->fileHandle;
198 BZ2_bzclose((BZFILE *)f);
201 fprintf(stderr, "Bad file type\n");
210 xmlTextReaderPtr inputUTF8(const char *name)
212 void *ctx = inputOpen(name);
215 fprintf(stderr, "Input reader create failed for: %s\n", name);
219 return xmlReaderForIO(readFile, inputClose, (void *)ctx, NULL, NULL, 0);