Libparserutils
|
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <parserutils/errors.h>
#include <parserutils/functypes.h>
#include <parserutils/types.h>
#include <parserutils/charset/utf8.h>
#include <parserutils/utils/buffer.h>
Go to the source code of this file.
Data Structures | |
struct | parserutils_inputstream |
Input stream object. More... | |
Macros | |
#define | IS_ASCII(x) |
Typedefs | |
typedef parserutils_error(* | parserutils_charset_detect_func) (const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source) |
Type of charset detection function. | |
typedef struct parserutils_inputstream | parserutils_inputstream |
Input stream object. | |
Functions | |
parserutils_error | parserutils_inputstream_create (const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream) |
Create an input stream. | |
parserutils_error | parserutils_inputstream_destroy (parserutils_inputstream *stream) |
Destroy an input stream. | |
parserutils_error | parserutils_inputstream_append (parserutils_inputstream *stream, const uint8_t *data, size_t len) |
Append data to an input stream. | |
parserutils_error | parserutils_inputstream_insert (parserutils_inputstream *stream, const uint8_t *data, size_t len) |
Insert data into stream at current location. | |
parserutils_error | parserutils_inputstream_peek_slow (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length) |
Look at the character in the stream that starts at offset bytes from the cursor (slow version) | |
static parserutils_error | parserutils_inputstream_peek (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length) |
Look at the character in the stream that starts at offset bytes from the cursor. | |
static void | parserutils_inputstream_advance (parserutils_inputstream *stream, size_t bytes) |
Advance the stream's current position. | |
const char * | parserutils_inputstream_read_charset (parserutils_inputstream *stream, uint32_t *source) |
Read the source charset of the input stream. | |
parserutils_error | parserutils_inputstream_change_charset (parserutils_inputstream *stream, const char *enc, uint32_t source) |
Change the source charset of the input stream. | |
#define IS_ASCII | ( | x | ) |
Referenced by parserutils_inputstream_peek().
typedef parserutils_error(* parserutils_charset_detect_func) (const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source) |
Type of charset detection function.
Definition at line 32 of file inputstream.h.
typedef struct parserutils_inputstream parserutils_inputstream |
Input stream object.
|
inlinestatic |
Advance the stream's current position.
stream | The stream whose position to advance |
bytes | The number of bytes to advance |
Definition at line 151 of file inputstream.h.
References parserutils_inputstream::cursor, parserutils_buffer::length, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_append | ( | parserutils_inputstream * | stream, |
const uint8_t * | data, | ||
size_t | len ) |
Append data to an input stream.
stream | Input stream to append data to |
data | Data to append (in document charset), or NULL to flag EOF |
len | Length, in bytes, of data |
Definition at line 169 of file inputstream.c.
References parserutils_inputstream::had_eof, len, PARSERUTILS_BADPARM, parserutils_buffer_append(), PARSERUTILS_OK, parserutils_inputstream_private::public, and parserutils_inputstream_private::raw.
parserutils_error parserutils_inputstream_change_charset | ( | parserutils_inputstream * | stream, |
const char * | enc, | ||
uint32_t | source ) |
Change the source charset of the input stream.
stream | Input stream to modify |
enc | Charset name |
source | Charset source identifier |
Definition at line 321 of file inputstream.c.
References parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_INVALID, and PARSERUTILS_OK.
parserutils_error parserutils_inputstream_create | ( | const char * | enc, |
uint32_t | encsrc, | ||
parserutils_charset_detect_func | csdetect, | ||
parserutils_inputstream ** | stream ) |
Create an input stream.
enc | Document charset, or NULL to autodetect |
encsrc | Value for encoding source, if specified, or 0 |
csdetect | Charset detection function, or NULL |
stream | Pointer to location to receive stream instance |
The value 0 is defined as being the lowest priority encoding source (i.e. the default fallback encoding). Beyond this, no further interpretation is made upon the encoding source.
Definition at line 59 of file inputstream.c.
References parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_create(), parserutils__filter_destroy(), parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_buffer_create(), parserutils_buffer_destroy(), parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_destroy | ( | parserutils_inputstream * | stream | ) |
Destroy an input stream.
stream | Input stream to destroy |
Definition at line 144 of file inputstream.c.
References parserutils_inputstream_private::input, parserutils__filter_destroy(), PARSERUTILS_BADPARM, parserutils_buffer_destroy(), PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_insert | ( | parserutils_inputstream * | stream, |
const uint8_t * | data, | ||
size_t | len ) |
Insert data into stream at current location.
stream | Input stream to insert into |
data | Data to insert (UTF-8 encoded) |
len | Length, in bytes, of data |
Definition at line 195 of file inputstream.c.
References parserutils_inputstream::cursor, len, PARSERUTILS_BADPARM, parserutils_buffer_insert(), parserutils_inputstream_private::public, and parserutils_inputstream::utf8.
|
inlinestatic |
Look at the character in the stream that starts at offset bytes from the cursor.
stream | Stream to look in |
offset | Byte offset of start of character |
ptr | Pointer to location to receive pointer to character data |
length | Pointer to location to receive character length (in bytes) |
Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.
Definition at line 91 of file inputstream.h.
References parserutils_inputstream::cursor, parserutils_buffer::data, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_buffer_randomise(), parserutils_charset_utf8_char_byte_length(), parserutils_inputstream_peek_slow(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_peek_slow | ( | parserutils_inputstream * | stream, |
size_t | offset, | ||
const uint8_t ** | ptr, | ||
size_t * | length ) |
Look at the character in the stream that starts at offset bytes from the cursor (slow version)
stream | Stream to look in |
offset | Byte offset of start of character |
ptr | Pointer to location to receive pointer to character data |
length | Pointer to location to receive character length (in bytes) |
Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.
Definition at line 232 of file inputstream.c.
References parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream::had_eof, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_charset_utf8_char_byte_length(), PARSERUTILS_EOF, parserutils_inputstream_refill_buffer(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
Referenced by parserutils_inputstream_peek().
const char * parserutils_inputstream_read_charset | ( | parserutils_inputstream * | stream, |
uint32_t * | source ) |
Read the source charset of the input stream.
stream | Input stream to query |
source | Pointer to location to receive charset source identifier |
Definition at line 292 of file inputstream.c.
References parserutils_inputstream_private::encsrc, parserutils_inputstream_private::mibenum, and parserutils_charset_mibenum_to_name().