Libparserutils
Data Structures | Macros | Typedefs | Functions
inputstream.h File Reference
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <parserutils/errors.h>
#include <parserutils/functypes.h>
#include <parserutils/types.h>
#include <parserutils/charset/utf8.h>
#include <parserutils/utils/buffer.h>

Go to the source code of this file.

Data Structures

struct  parserutils_inputstream
 Input stream object. More...
 

Macros

#define IS_ASCII(x)   (((x) & 0x80) == 0)
 

Typedefs

typedef parserutils_error(* parserutils_charset_detect_func) (const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source)
 Type of charset detection function.
 
typedef struct parserutils_inputstream parserutils_inputstream
 Input stream object.
 

Functions

parserutils_error parserutils_inputstream_create (const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream)
 Create an input stream.
 
parserutils_error parserutils_inputstream_destroy (parserutils_inputstream *stream)
 Destroy an input stream.
 
parserutils_error parserutils_inputstream_append (parserutils_inputstream *stream, const uint8_t *data, size_t len)
 Append data to an input stream.
 
parserutils_error parserutils_inputstream_insert (parserutils_inputstream *stream, const uint8_t *data, size_t len)
 Insert data into stream at current location.
 
parserutils_error parserutils_inputstream_peek_slow (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
 Look at the character in the stream that starts at offset bytes from the cursor (slow version)
 
static parserutils_error parserutils_inputstream_peek (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
 Look at the character in the stream that starts at offset bytes from the cursor.
 
static void parserutils_inputstream_advance (parserutils_inputstream *stream, size_t bytes)
 Advance the stream's current position.
 
const char * parserutils_inputstream_read_charset (parserutils_inputstream *stream, uint32_t *source)
 Read the source charset of the input stream.
 
parserutils_error parserutils_inputstream_change_charset (parserutils_inputstream *stream, const char *enc, uint32_t source)
 Change the source charset of the input stream.
 

Macro Definition Documentation

◆ IS_ASCII

#define IS_ASCII (   x)    (((x) & 0x80) == 0)

Typedef Documentation

◆ parserutils_charset_detect_func

typedef parserutils_error(* parserutils_charset_detect_func) (const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source)

Type of charset detection function.

Definition at line 32 of file inputstream.h.

◆ parserutils_inputstream

Input stream object.

Function Documentation

◆ parserutils_inputstream_advance()

static void parserutils_inputstream_advance ( parserutils_inputstream stream,
size_t  bytes 
)
inlinestatic

Advance the stream's current position.

Parameters
streamThe stream whose position to advance
bytesThe number of bytes to advance

Definition at line 151 of file inputstream.h.

References parserutils_inputstream::cursor, parserutils_buffer::length, and parserutils_inputstream::utf8.

◆ parserutils_inputstream_append()

parserutils_error parserutils_inputstream_append ( parserutils_inputstream stream,
const uint8_t *  data,
size_t  len 
)

Append data to an input stream.

Parameters
streamInput stream to append data to
dataData to append (in document charset), or NULL to flag EOF
lenLength, in bytes, of data
Returns
PARSERUTILS_OK on success, appropriate error otherwise

Definition at line 169 of file inputstream.c.

References parserutils_inputstream::had_eof, len, PARSERUTILS_BADPARM, parserutils_buffer_append(), PARSERUTILS_OK, parserutils_inputstream_private::public, and parserutils_inputstream_private::raw.

◆ parserutils_inputstream_change_charset()

parserutils_error parserutils_inputstream_change_charset ( parserutils_inputstream stream,
const char *  enc,
uint32_t  source 
)

Change the source charset of the input stream.

Parameters
streamInput stream to modify
encCharset name
sourceCharset source identifier
Returns
PARSERUTILS_OK on success, PARSERUTILS_BADPARM on invalid parameters, PARSERUTILS_INVALID if called after data has been read from stream, PARSERUTILS_BADENCODING if the encoding is unsupported, PARSERUTILS_NOMEM on memory exhaustion.

Definition at line 321 of file inputstream.c.

References parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_INVALID, and PARSERUTILS_OK.

◆ parserutils_inputstream_create()

parserutils_error parserutils_inputstream_create ( const char *  enc,
uint32_t  encsrc,
parserutils_charset_detect_func  csdetect,
parserutils_inputstream **  stream 
)

Create an input stream.

Parameters
encDocument charset, or NULL to autodetect
encsrcValue for encoding source, if specified, or 0
csdetectCharset detection function, or NULL
streamPointer to location to receive stream instance
Returns
PARSERUTILS_OK on success, PARSERUTILS_BADPARM on bad parameters, PARSERUTILS_NOMEM on memory exhaustion, PARSERUTILS_BADENCODING on unsupported encoding

The value 0 is defined as being the lowest priority encoding source (i.e. the default fallback encoding). Beyond this, no further interpretation is made upon the encoding source.

Definition at line 59 of file inputstream.c.

References parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_create(), parserutils__filter_destroy(), parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_buffer_create(), parserutils_buffer_destroy(), parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.

◆ parserutils_inputstream_destroy()

parserutils_error parserutils_inputstream_destroy ( parserutils_inputstream stream)

Destroy an input stream.

Parameters
streamInput stream to destroy
Returns
PARSERUTILS_OK on success, appropriate error otherwise

Definition at line 144 of file inputstream.c.

References parserutils_inputstream_private::input, parserutils__filter_destroy(), PARSERUTILS_BADPARM, parserutils_buffer_destroy(), PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.

◆ parserutils_inputstream_insert()

parserutils_error parserutils_inputstream_insert ( parserutils_inputstream stream,
const uint8_t *  data,
size_t  len 
)

Insert data into stream at current location.

Parameters
streamInput stream to insert into
dataData to insert (UTF-8 encoded)
lenLength, in bytes, of data
Returns
PARSERUTILS_OK on success, appropriate error otherwise

Definition at line 195 of file inputstream.c.

References parserutils_inputstream::cursor, len, PARSERUTILS_BADPARM, parserutils_buffer_insert(), parserutils_inputstream_private::public, and parserutils_inputstream::utf8.

◆ parserutils_inputstream_peek()

static parserutils_error parserutils_inputstream_peek ( parserutils_inputstream stream,
size_t  offset,
const uint8_t **  ptr,
size_t *  length 
)
inlinestatic

Look at the character in the stream that starts at offset bytes from the cursor.

Parameters
streamStream to look in
offsetByte offset of start of character
ptrPointer to location to receive pointer to character data
lengthPointer to location to receive character length (in bytes)
Returns
PARSERUTILS_OK on success, _NEEDDATA on reaching the end of available input, _EOF on reaching the end of all input, _BADENCODING if the input cannot be decoded, _NOMEM on memory exhaustion, _BADPARM if bad parameters are passed.

Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.

Definition at line 91 of file inputstream.h.

References parserutils_inputstream::cursor, parserutils_buffer::data, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_buffer_randomise(), parserutils_charset_utf8_char_byte_length(), parserutils_inputstream_peek_slow(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, and parserutils_inputstream::utf8.

◆ parserutils_inputstream_peek_slow()

parserutils_error parserutils_inputstream_peek_slow ( parserutils_inputstream stream,
size_t  offset,
const uint8_t **  ptr,
size_t *  length 
)

Look at the character in the stream that starts at offset bytes from the cursor (slow version)

Parameters
streamStream to look in
offsetByte offset of start of character
ptrPointer to location to receive pointer to character data
lengthPointer to location to receive character length (in bytes)
Returns
PARSERUTILS_OK on success, _NEEDDATA on reaching the end of available input, _EOF on reaching the end of all input, _BADENCODING if the input cannot be decoded, _NOMEM on memory exhaustion, _BADPARM if bad parameters are passed.

Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.

Definition at line 232 of file inputstream.c.

References parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream::had_eof, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_charset_utf8_char_byte_length(), PARSERUTILS_EOF, parserutils_inputstream_refill_buffer(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.

Referenced by parserutils_inputstream_peek().

◆ parserutils_inputstream_read_charset()

const char * parserutils_inputstream_read_charset ( parserutils_inputstream stream,
uint32_t *  source 
)

Read the source charset of the input stream.

Parameters
streamInput stream to query
sourcePointer to location to receive charset source identifier
Returns
Pointer to charset name (constant; do not free)

Definition at line 292 of file inputstream.c.

References parserutils_inputstream_private::encsrc, parserutils_inputstream_private::mibenum, and parserutils_charset_mibenum_to_name().