Libparserutils
filter.c
Go to the documentation of this file.
1/*
2 * This file is part of LibParserUtils.
3 * Licensed under the MIT License,
4 * http://www.opensource.org/licenses/mit-license.php
5 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6 */
7
8#include <errno.h>
9#include <stdbool.h>
10#include <stdlib.h>
11#include <string.h>
12
13#ifndef WITHOUT_ICONV_FILTER
14#include <iconv.h>
15#endif
16
19
20#include "input/filter.h"
21#include "utils/utils.h"
22
25#ifndef WITHOUT_ICONV_FILTER
26 iconv_t cd;
27 uint16_t int_enc;
28#else
29 parserutils_charset_codec *read_codec;
30 parserutils_charset_codec *write_codec;
32 uint32_t pivot_buf[64];
34 bool leftover;
35 uint8_t *pivot_left;
36 size_t pivot_len;
37#endif
38
39 struct {
40 uint16_t encoding;
42};
43
46 const char *enc);
47
59 parserutils_filter **filter)
60{
63
64 if (int_enc == NULL || filter == NULL)
66
67 f = malloc(sizeof(parserutils_filter));
68 if (f == NULL)
69 return PARSERUTILS_NOMEM;
70
71#ifndef WITHOUT_ICONV_FILTER
72 f->cd = (iconv_t) -1;
74 int_enc, strlen(int_enc));
75 if (f->int_enc == 0) {
76 free(f);
78 }
79#else
80 f->leftover = false;
81 f->pivot_left = NULL;
82 f->pivot_len = 0;
83#endif
84
85 error = filter_set_defaults(f);
86 if (error != PARSERUTILS_OK) {
87 free(f);
88 return error;
89 }
90
91#ifdef WITHOUT_ICONV_FILTER
92 error = parserutils_charset_codec_create(int_enc, &f->write_codec);
93 if (error != PARSERUTILS_OK) {
94 if (f->read_codec != NULL) {
96 f->read_codec = NULL;
97 }
98 free(f);
99 return error;
100 }
101#endif
102
103 *filter = f;
104
105 return PARSERUTILS_OK;
106}
107
115{
116 if (input == NULL)
117 return PARSERUTILS_BADPARM;
118
119#ifndef WITHOUT_ICONV_FILTER
120 if (input->cd != (iconv_t) -1) {
121 iconv_close(input->cd);
122 input->cd = (iconv_t) -1;
123 }
124#else
125 if (input->read_codec != NULL) {
126 parserutils_charset_codec_destroy(input->read_codec);
127 input->read_codec = NULL;
128 }
129
130 if (input->write_codec != NULL) {
131 parserutils_charset_codec_destroy(input->write_codec);
132 input->write_codec = NULL;
133 }
134#endif
135
136 free(input);
137
138 return PARSERUTILS_OK;
139}
140
152{
154
155 if (input == NULL || params == NULL)
156 return PARSERUTILS_BADPARM;
157
158 switch (type) {
160 error = filter_set_encoding(input, params->encoding.name);
161 break;
162 }
163
164 return error;
165}
166
180 const uint8_t **data, size_t *len,
181 uint8_t **output, size_t *outlen)
182{
183 if (input == NULL || data == NULL || *data == NULL || len == NULL ||
184 output == NULL || *output == NULL || outlen == NULL)
185 return PARSERUTILS_BADPARM;
186
187#ifndef WITHOUT_ICONV_FILTER
188 if (iconv(input->cd, (void *) data, len,
189 (char **) output, outlen) == (size_t) -1) {
190 switch (errno) {
191 case E2BIG:
192 return PARSERUTILS_NOMEM;
193 case EILSEQ:
194 if (*outlen < 3)
195 return PARSERUTILS_NOMEM;
196
197 (*output)[0] = 0xef;
198 (*output)[1] = 0xbf;
199 (*output)[2] = 0xbd;
200
201 *output += 3;
202 *outlen -= 3;
203
204 (*data)++;
205 (*len)--;
206
207 while (*len > 0) {
208 size_t ret;
209
210 ret = iconv(input->cd, (void *) data, len,
211 (char **) output, outlen);
212 if (ret != (size_t) -1 || errno != EILSEQ)
213 break;
214
215 if (*outlen < 3)
216 return PARSERUTILS_NOMEM;
217
218 (*output)[0] = 0xef;
219 (*output)[1] = 0xbf;
220 (*output)[2] = 0xbd;
221
222 *output += 3;
223 *outlen -= 3;
224
225 (*data)++;
226 (*len)--;
227 }
228
229 return errno == E2BIG ? PARSERUTILS_NOMEM
231 }
232 }
233
234 return PARSERUTILS_OK;
235#else
236 if (input->leftover) {
237 parserutils_error write_error;
238
239 /* Some data left to be written from last call */
240
241 /* Attempt to flush the remaining data. */
243 input->write_codec,
244 (const uint8_t **) &input->pivot_left,
245 &input->pivot_len,
246 output, outlen);
247
248 if (write_error != PARSERUTILS_OK)
249 return write_error;
250
251
252 /* And clear leftover */
253 input->pivot_left = NULL;
254 input->pivot_len = 0;
255 input->leftover = false;
256 }
257
258 while (*len > 0) {
259 parserutils_error read_error, write_error;
260 size_t pivot_len = sizeof(input->pivot_buf);
261 uint8_t *pivot = (uint8_t *) input->pivot_buf;
262
263 read_error = parserutils_charset_codec_decode(input->read_codec,
264 data, len,
265 (uint8_t **) &pivot, &pivot_len);
266
267 pivot = (uint8_t *) input->pivot_buf;
268 pivot_len = sizeof(input->pivot_buf) - pivot_len;
269
270 if (pivot_len > 0) {
272 input->write_codec,
273 (const uint8_t **) &pivot,
274 &pivot_len,
275 output, outlen);
276
277 if (write_error != PARSERUTILS_OK) {
278 input->leftover = true;
279 input->pivot_left = pivot;
280 input->pivot_len = pivot_len;
281
282 return write_error;
283 }
284 }
285
286 if (read_error != PARSERUTILS_OK &&
287 read_error != PARSERUTILS_NOMEM)
288 return read_error;
289 }
290
291 return PARSERUTILS_OK;
292#endif
293}
294
302{
304
305 if (input == NULL)
306 return PARSERUTILS_BADPARM;
307
308#ifndef WITHOUT_ICONV_FILTER
309 iconv(input->cd, NULL, 0, NULL, 0);
310#else
311 /* Clear pivot buffer leftovers */
312 input->pivot_left = NULL;
313 input->pivot_len = 0;
314 input->leftover = false;
315
316 /* Reset read codec */
317 error = parserutils_charset_codec_reset(input->read_codec);
318 if (error != PARSERUTILS_OK)
319 return error;
320
321 /* Reset write codec */
322 error = parserutils_charset_codec_reset(input->write_codec);
323 if (error != PARSERUTILS_OK)
324 return error;
325#endif
326
327 return error;
328}
329
337{
338 parserutils_error error;
339
340 if (input == NULL)
341 return PARSERUTILS_BADPARM;
342
343#ifdef WITHOUT_ICONV_FILTER
344 input->read_codec = NULL;
345 input->write_codec = NULL;
346#endif
347
348 input->settings.encoding = 0;
349 error = filter_set_encoding(input, "UTF-8");
350 if (error != PARSERUTILS_OK)
351 return error;
352
353 return PARSERUTILS_OK;
354}
355
364 const char *enc)
365{
367 uint16_t mibenum;
368
369 if (input == NULL || enc == NULL)
370 return PARSERUTILS_BADPARM;
371
372 mibenum = parserutils_charset_mibenum_from_name(enc, strlen(enc));
373 if (mibenum == 0)
375
376 /* Exit early if we're already using this encoding */
377 if (input->settings.encoding == mibenum)
378 return PARSERUTILS_OK;
379
380#ifndef WITHOUT_ICONV_FILTER
381 if (input->cd != (iconv_t) -1) {
382 iconv_close(input->cd);
383 input->cd = (iconv_t) -1;
384 }
385
386 input->cd = iconv_open(
389 if (input->cd == (iconv_t) -1) {
390 return (errno == EINVAL) ? PARSERUTILS_BADENCODING
392 }
393#else
394 if (input->read_codec != NULL) {
395 parserutils_charset_codec_destroy(input->read_codec);
396 input->read_codec = NULL;
397 }
398
399 error = parserutils_charset_codec_create(enc, &input->read_codec);
400 if (error != PARSERUTILS_OK)
401 return error;
402#endif
403
404 input->settings.encoding = mibenum;
405
406 return error;
407
408}
parserutils_error parserutils_charset_codec_encode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Encode a chunk of UCS-4 data into a codec's charset.
Definition codec.c:136
parserutils_error parserutils_charset_codec_destroy(parserutils_charset_codec *codec)
Destroy a charset codec.
Definition codec.c:86
parserutils_error parserutils_charset_codec_decode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Decode a chunk of data in a codec's charset into UCS-4.
Definition codec.c:163
parserutils_error parserutils_charset_codec_reset(parserutils_charset_codec *codec)
Clear a charset codec's encoding state.
Definition codec.c:182
parserutils_error parserutils_charset_codec_create(const char *charset, parserutils_charset_codec **codec)
Create a charset codec.
Definition codec.c:38
size_t len
Definition codec_8859.c:23
parserutils_error
Definition errors.h:18
@ PARSERUTILS_OK
Definition errors.h:19
@ PARSERUTILS_BADENCODING
Definition errors.h:26
@ PARSERUTILS_NOMEM
Definition errors.h:21
@ PARSERUTILS_BADPARM
Definition errors.h:22
parserutils_error parserutils__filter_reset(parserutils_filter *input)
Reset an input filter's state.
Definition filter.c:301
parserutils_error parserutils__filter_process_chunk(parserutils_filter *input, const uint8_t **data, size_t *len, uint8_t **output, size_t *outlen)
Process a chunk of data.
Definition filter.c:179
parserutils_error parserutils__filter_create(const char *int_enc, parserutils_filter **filter)
Create an input filter.
Definition filter.c:58
parserutils_error parserutils__filter_destroy(parserutils_filter *input)
Destroy an input filter.
Definition filter.c:114
static parserutils_error filter_set_defaults(parserutils_filter *input)
Set an input filter's default settings.
Definition filter.c:336
static parserutils_error filter_set_encoding(parserutils_filter *input, const char *enc)
Set an input filter's encoding.
Definition filter.c:363
parserutils_error parserutils__filter_setopt(parserutils_filter *input, parserutils_filter_opttype type, parserutils_filter_optparams *params)
Configure an input filter.
Definition filter.c:149
parserutils_filter_opttype
Input filter option types.
Definition filter.h:21
@ PARSERUTILS_FILTER_SET_ENCODING
Definition filter.h:22
uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len)
Retrieve the MIB enum value assigned to an encoding name.
Definition aliases.c:107
const char * parserutils_charset_mibenum_to_name(uint16_t mibenum)
Retrieve the canonical name of an encoding from the MIB enum.
Definition aliases.c:127
Core charset codec definition; implementations extend this.
Definition codec_impl.h:19
Input filter.
Definition filter.c:24
uint16_t encoding
Input encoding.
Definition filter.c:40
uint16_t int_enc
The internal encoding.
Definition filter.c:27
struct parserutils_filter::@046154044337351306253237023313063315315135276112 settings
Filter settings.
iconv_t cd
Iconv conversion descriptor.
Definition filter.c:26
Input filter option parameters.
Definition filter.h:28
const char * name
Encoding name.
Definition filter.h:32
struct parserutils_filter_optparams::@366367343155021357344176363127175263216357276052 encoding
Parameters for encoding setting.