Coverage Report

Created: 2024-05-03 06:05

/builds/xfbs/passgen/src/parser/token.c
Line
Count
Source (jump to first uncovered line)
1
#include "passgen/parser/token.h"
2
3
61
void passgen_token_parser_init(struct passgen_token_parser *token_parser) {
4
61
    token_parser->state = PASSGEN_TOKEN_INIT;
5
61
    token_parser->offset = 0;
6
61
    token_parser->byte_offset = 0;
7
61
}
8
9
263
const char *passgen_token_state_string(enum passgen_token_state state) {
10
263
    switch(state) {
11
1
        case PASSGEN_TOKEN_INIT:
12
1
            return "ready for parsing";
13
208
        case PASSGEN_TOKEN_ESCAPED:
14
208
            return "parsing escaped token";
15
4
        case PASSGEN_TOKEN_UNICODE:
16
4
            return "parsing unicode token";
17
3
        case PASSGEN_TOKEN_UNICODE_PAYLOAD:
18
3
            return "parsing unicode token payload";
19
41
        case PASSGEN_TOKEN_ERROR_UNICODE_START:
20
41
            return "unexpected character while parsing unicode literal";
21
1
        case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD:
22
1
            return "unexpected character while parsing unicode literal";
23
3
        case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN:
24
3
            return "unicode literal payload too long";
25
2
        default:
26
2
            return NULL;
27
263
    }
28
263
}
29
30
5.56M
static inline int8_t hex_decode(uint32_t c) {
31
5.56M
    if('0' <= c && 
c <= '9'5.56M
) {
32
3.50M
        return c - '0';
33
3.50M
    }
34
2.06M
35
2.06M
    if('A' <= c && 
c <= 'F'2.06M
) {
36
5
        return 10 + c - 'A';
37
5
    }
38
2.06M
39
2.06M
    if('a' <= c && 
c <= 'f'2.06M
) {
40
2.06M
        return 10 + c - 'a';
41
2.06M
    }
42
10
43
10
    return -1;
44
10
}
45
46
static inline void token_parse_init(
47
    struct passgen_token_parser *parser,
48
    struct passgen_token *token,
49
3.68M
    uint32_t codepoint) {
50
3.68M
    // save position of initial token
51
3.68M
    token->offset = parser->offset;
52
3.68M
    token->byte_offset = parser->byte_offset;
53
3.68M
54
3.68M
    if(codepoint == '\\') {
55
2.23M
        parser->state = PASSGEN_TOKEN_ESCAPED;
56
2.23M
    } else {
57
1.45M
        token->codepoint = codepoint;
58
1.45M
        parser->state = PASSGEN_TOKEN_INIT;
59
1.45M
    }
60
3.68M
}
61
62
// Simple ASCII escape map. Don't use this for large (unicode) codepoints.
63
// Provides efficient O(1) lookup.
64
static const char simple_escaped[] = {
65
    0,
66
    ['a'] = '\a',
67
    ['b'] = '\b',
68
    ['e'] = '\033',
69
    ['f'] = '\f',
70
    ['n'] = '\n',
71
    ['r'] = '\r',
72
    ['t'] = '\t',
73
    ['v'] = '\v',
74
    ['\\'] = '\\'};
75
76
static inline void token_parse_escaped(
77
    struct passgen_token_parser *parser,
78
    struct passgen_token *token,
79
2.23M
    uint32_t codepoint) {
80
2.23M
    // simple_escaped only covers ASCII, whereas codepoint could be much
81
2.23M
    // larger.
82
2.23M
    if(codepoint < sizeof(simple_escaped) && 
simple_escaped[codepoint]1.11M
) {
83
388
        token->codepoint = simple_escaped[codepoint];
84
388
        parser->state = PASSGEN_TOKEN_INIT;
85
388
86
388
        return;
87
388
    }
88
2.23M
89
2.23M
    switch(codepoint) {
90
1.11M
        case 'u':
91
1.11M
            parser->state = PASSGEN_TOKEN_UNICODE;
92
1.11M
            break;
93
1.11M
        default:
94
1.11M
            token->codepoint = codepoint | PASSGEN_TOKEN_ESCAPED_BIT;
95
1.11M
            parser->state = PASSGEN_TOKEN_INIT;
96
2.23M
    }
97
2.23M
}
98
99
static inline void
100
1.11M
token_parse_unicode(struct passgen_token_parser *parser, uint32_t codepoint) {
101
1.11M
    if(codepoint == '{') {
102
1.11M
        parser->state = PASSGEN_TOKEN_UNICODE_PAYLOAD;
103
1.11M
        parser->data.unicode_payload.length = 0;
104
1.11M
        parser->data.unicode_payload.codepoint = 0;
105
1.11M
    } else {
106
51
        parser->state = PASSGEN_TOKEN_ERROR_UNICODE_START;
107
51
    }
108
1.11M
}
109
110
static inline void token_parse_unicode_payload(
111
    struct passgen_token_parser *parser,
112
    struct passgen_token *token,
113
6.68M
    uint32_t codepoint) {
114
6.68M
    // once we read the closing brace, the payload is over and we can emit the
115
6.68M
    // token.
116
6.68M
    if(codepoint == '}') {
117
1.11M
        token->codepoint = parser->data.unicode_payload.codepoint;
118
1.11M
        parser->state = PASSGEN_TOKEN_INIT;
119
1.11M
120
1.11M
        return;
121
1.11M
    }
122
5.56M
123
5.56M
    // keep track of length, make sure it's not too long.
124
5.56M
    parser->data.unicode_payload.length++;
125
5.56M
    if(parser->data.unicode_payload.length > 6) {
126
6
        parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN;
127
6
128
6
        return;
129
6
    }
130
5.56M
131
5.56M
    // try to decode the hex value.
132
5.56M
    int8_t decoded = hex_decode(codepoint);
133
5.56M
    if(decoded < 0) {
134
10
        parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD;
135
10
136
10
        return;
137
10
    }
138
5.56M
139
5.56M
    parser->data.unicode_payload.codepoint *= 16;
140
5.56M
    parser->data.unicode_payload.codepoint += decoded;
141
5.56M
}
142
143
int passgen_token_parse(
144
    struct passgen_token_parser *parser,
145
    struct passgen_token *token,
146
    uint8_t width,
147
13.7M
    uint32_t codepoint) {
148
13.7M
    switch(parser->state) {
149
3.68M
        case PASSGEN_TOKEN_INIT:
150
3.68M
            token_parse_init(parser, token, codepoint);
151
3.68M
            break;
152
2.23M
        case PASSGEN_TOKEN_ESCAPED:
153
2.23M
            token_parse_escaped(parser, token, codepoint);
154
2.23M
            break;
155
1.11M
        case PASSGEN_TOKEN_UNICODE:
156
1.11M
            token_parse_unicode(parser, codepoint);
157
1.11M
            break;
158
6.68M
        case PASSGEN_TOKEN_UNICODE_PAYLOAD:
159
6.68M
            token_parse_unicode_payload(parser, token, codepoint);
160
6.68M
            break;
161
3
        default:
162
3
            return parser->state;
163
13.7M
    }
164
13.7M
165
13.7M
    // update parser offsets
166
13.7M
    parser->offset += 1;
167
13.7M
    parser->byte_offset += width;
168
13.7M
169
13.7M
    return parser->state;
170
13.7M
}
171
172
254
const char *passgen_token_parse_error_str(int ret) {
173
254
    return passgen_token_state_string(ret);
174
254
}