Coverage Report

Created: 2024-05-03 06:05

/builds/xfbs/passgen/include/passgen/util/utf8.h
Line
Count
Source
1
/// @file utf8.h
2
/// @author Patrick M. Elsen <pelsen@xfbs.net>
3
/// @brief UTF-8 encoding and decoding utilities.
4
///
5
/// Internally, passgen has proper Unicode support (meaning that it works with UTF-32
6
/// codepoints). However, for transport and storage, UTF-8 is more common. Therefore,
7
/// these functions are needed to convert from UTF-8 to Unicode codepoints, and vice
8
/// versa.
9
#pragma once
10
#include <stddef.h>
11
#include <stdint.h>
12
13
2
#define PASSGEN_UTF8_OUTPUT_SIZE 1
14
54.5k
#define PASSGEN_UTF8_SUCCESS 0
15
16
#define PASSGEN_UTF8_INVALID_CHAR -1
16
17
160k
#define PASSGEN_UNICODE_MAX 0x10FFFF
18
19
/// Decodes a UTF-8 character sequence into an output array.
20
///
21
/// Decodes the UTF-8 character sequence in `input` (with length `input_len`) into
22
/// the Unicode codepoint array `output` (with length `output_len`). The number of
23
/// bytes processed from the input is written into the value pointed to by `input_pos`,
24
/// and the amount of codepoints written into `output` is written into the value
25
/// pointed to by `output_pos`. If `output_widths` is supplied, then for every decoded
26
/// codepoint, the byte width of the codepoint is written into it.
27
///
28
/// @param output The decoded unicode output. This must point to a uint32_t array
29
///   of size `output_len`.
30
/// @param output_len The length of `output`, and if it is supplied, `output_widths`.
31
/// @param output_pos A pointer to a `size_t variable containing the current offset
32
///   into the `output` array. The value of that variable should initially be zero.
33
///   At the end, this variable indicates how many unicode codepoints were written
34
///   into the output.
35
/// @param output_widths A pointer to an array of `uint8_t`, which will contain
36
///   information of the byte width of every parsed unicode codepoint. If a `NULL`
37
///   pointer is passed, this behaviour is disabled.
38
/// @param input The UTF-8 input.
39
/// @param input_len The length of the UTF-8 input sequence.
40
/// @param input_pos Pointer to a `size_t` containing the current offset into the
41
///   input array. The value of this variable should initially be set to zero. After
42
///   returning, it will indicate the number of processed bytes from the input.
43
/// @return 0 on success, a positive integer when the output is too small to fit the
44
///   decoded codepoints, and a negative integer on error.
45
int passgen_utf8_decode(
46
    uint32_t **output,
47
    size_t output_len,
48
    uint8_t *output_widths,
49
    const uint8_t **input,
50
    size_t input_len);
51
52
/// Encodes unicode characters into a UTF-8 character sequence.
53
int passgen_utf8_encode(
54
    uint8_t *out,
55
    size_t out_len,
56
    size_t *out_pos,
57
    const uint32_t *in,
58
    size_t in_len,
59
    size_t *in_pos);
60
61
/// Encode a single codepoint.
62
int passgen_utf8_encode_codepoint(uint8_t *out, uint32_t codepoint);
63
64
/// Given an error (negative return value), returns a string describing it.
65
const char *passgen_utf8_error(int retval);