/builds/xfbs/passgen/src/parser/token.c

Source (jump to first uncovered line)
#include "passgen/parser/token.h"

void passgen_token_parser_init(struct passgen_token_parser *token_parser) {
    token_parser->state = PASSGEN_TOKEN_INIT;
    token_parser->offset = 0;
    token_parser->byte_offset = 0;
}

const char *passgen_token_state_string(enum passgen_token_state state) {
    switch(state) {
        case PASSGEN_TOKEN_INIT:
            return "ready for parsing";
        case PASSGEN_TOKEN_ESCAPED:
            return "parsing escaped token";
        case PASSGEN_TOKEN_UNICODE:
            return "parsing unicode token";
        case PASSGEN_TOKEN_UNICODE_PAYLOAD:
            return "parsing unicode token payload";
        case PASSGEN_TOKEN_ERROR_UNICODE_START:
            return "unexpected character while parsing unicode literal";
        case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD:
            return "unexpected character while parsing unicode literal";
        case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN:
            return "unicode literal payload too long";
        default:
            return NULL;
    }
}

static inline int8_t hex_decode(uint32_t c) {
    if('0' <= c && c <= '9'5.56M) {
        return c - '0';
    }

    if('A' <= c && c <= 'F'2.06M) {
        return 10 + c - 'A';
    }

    if('a' <= c && c <= 'f'2.06M) {
        return 10 + c - 'a';
    }

    return -1;
}

static inline void token_parse_init(
    struct passgen_token_parser *parser,
    struct passgen_token *token,
    uint32_t codepoint) {
    // save position of initial token
    token->offset = parser->offset;
    token->byte_offset = parser->byte_offset;

    if(codepoint == '\\') {
        parser->state = PASSGEN_TOKEN_ESCAPED;
    } else {
        token->codepoint = codepoint;
        parser->state = PASSGEN_TOKEN_INIT;
    }
}

// Simple ASCII escape map. Don't use this for large (unicode) codepoints.
// Provides efficient O(1) lookup.
static const char simple_escaped[] = {
    0,
    ['a'] = '\a',
    ['b'] = '\b',
    ['e'] = '\033',
    ['f'] = '\f',
    ['n'] = '\n',
    ['r'] = '\r',
    ['t'] = '\t',
    ['v'] = '\v',
    ['\\'] = '\\'};

static inline void token_parse_escaped(
    struct passgen_token_parser *parser,
    struct passgen_token *token,
    uint32_t codepoint) {
    // simple_escaped only covers ASCII, whereas codepoint could be much
    // larger.
    if(codepoint < sizeof(simple_escaped) && simple_escaped[codepoint]1.11M) {
        token->codepoint = simple_escaped[codepoint];
        parser->state = PASSGEN_TOKEN_INIT;

        return;
    }

    switch(codepoint) {
        case 'u':
            parser->state = PASSGEN_TOKEN_UNICODE;
            break;
        default:
            token->codepoint = codepoint | PASSGEN_TOKEN_ESCAPED_BIT;
            parser->state = PASSGEN_TOKEN_INIT;
    }
}

static inline void
token_parse_unicode(struct passgen_token_parser *parser, uint32_t codepoint) {
    if(codepoint == '{') {
        parser->state = PASSGEN_TOKEN_UNICODE_PAYLOAD;
        parser->data.unicode_payload.length = 0;
        parser->data.unicode_payload.codepoint = 0;
    } else {
        parser->state = PASSGEN_TOKEN_ERROR_UNICODE_START;
    }
}

static inline void token_parse_unicode_payload(
    struct passgen_token_parser *parser,
    struct passgen_token *token,
    uint32_t codepoint) {
    // once we read the closing brace, the payload is over and we can emit the
    // token.
    if(codepoint == '}') {
        token->codepoint = parser->data.unicode_payload.codepoint;
        parser->state = PASSGEN_TOKEN_INIT;

        return;
    }

    // keep track of length, make sure it's not too long.
    parser->data.unicode_payload.length++;
    if(parser->data.unicode_payload.length > 6) {
        parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN;

        return;
    }

    // try to decode the hex value.
    int8_t decoded = hex_decode(codepoint);
    if(decoded < 0) {
        parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD;

        return;
    }

    parser->data.unicode_payload.codepoint *= 16;
    parser->data.unicode_payload.codepoint += decoded;
}

int passgen_token_parse(
    struct passgen_token_parser *parser,
    struct passgen_token *token,
    uint8_t width,
    uint32_t codepoint) {
    switch(parser->state) {
        case PASSGEN_TOKEN_INIT:
            token_parse_init(parser, token, codepoint);
            break;
        case PASSGEN_TOKEN_ESCAPED:
            token_parse_escaped(parser, token, codepoint);
            break;
        case PASSGEN_TOKEN_UNICODE:
            token_parse_unicode(parser, codepoint);
            break;
        case PASSGEN_TOKEN_UNICODE_PAYLOAD:
            token_parse_unicode_payload(parser, token, codepoint);
            break;
        default:
            return parser->state;
    }

    // update parser offsets
    parser->offset += 1;
    parser->byte_offset += width;

    return parser->state;
}

const char *passgen_token_parse_error_str(int ret) {
    return passgen_token_state_string(ret);
}

Line	Count	Source (jump to first uncovered line)
1		#include "passgen/parser/token.h"
2
3	61	void passgen_token_parser_init(struct passgen_token_parser *token_parser) {
4	61	token_parser->state = PASSGEN_TOKEN_INIT;
5	61	token_parser->offset = 0;
6	61	token_parser->byte_offset = 0;
7	61	}
8
9	263	const char *passgen_token_state_string(enum passgen_token_state state) {
10	263	switch(state) {
11	1	case PASSGEN_TOKEN_INIT:
12	1	return "ready for parsing";
13	208	case PASSGEN_TOKEN_ESCAPED:
14	208	return "parsing escaped token";
15	4	case PASSGEN_TOKEN_UNICODE:
16	4	return "parsing unicode token";
17	3	case PASSGEN_TOKEN_UNICODE_PAYLOAD:
18	3	return "parsing unicode token payload";
19	41	case PASSGEN_TOKEN_ERROR_UNICODE_START:
20	41	return "unexpected character while parsing unicode literal";
21	1	case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD:
22	1	return "unexpected character while parsing unicode literal";
23	3	case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN:
24	3	return "unicode literal payload too long";
25	2	default:
26	2	return NULL;
27	263	}
28	263	}
29
30	5.56M	static inline int8_t hex_decode(uint32_t c) {
31	5.56M	if('0' <= c && c <= '9'5.56M ) {
32	3.50M	return c - '0';
33	3.50M	}
34	2.06M
35	2.06M	if('A' <= c && c <= 'F'2.06M ) {
36	5	return 10 + c - 'A';
37	5	}
38	2.06M
39	2.06M	if('a' <= c && c <= 'f'2.06M ) {
40	2.06M	return 10 + c - 'a';
41	2.06M	}
42	10
43	10	return -1;
44	10	}
45
46		static inline void token_parse_init(
47		struct passgen_token_parser *parser,
48		struct passgen_token *token,
49	3.68M	uint32_t codepoint) {
50	3.68M	// save position of initial token
51	3.68M	token->offset = parser->offset;
52	3.68M	token->byte_offset = parser->byte_offset;
53	3.68M
54	3.68M	if(codepoint == '\\') {
55	2.23M	parser->state = PASSGEN_TOKEN_ESCAPED;
56	2.23M	} else {
57	1.45M	token->codepoint = codepoint;
58	1.45M	parser->state = PASSGEN_TOKEN_INIT;
59	1.45M	}
60	3.68M	}
61
62		// Simple ASCII escape map. Don't use this for large (unicode) codepoints.
63		// Provides efficient O(1) lookup.
64		static const char simple_escaped[] = {
65		0,
66		['a'] = '\a',
67		['b'] = '\b',
68		['e'] = '\033',
69		['f'] = '\f',
70		['n'] = '\n',
71		['r'] = '\r',
72		['t'] = '\t',
73		['v'] = '\v',
74		['\\'] = '\\'};
75
76		static inline void token_parse_escaped(
77		struct passgen_token_parser *parser,
78		struct passgen_token *token,
79	2.23M	uint32_t codepoint) {
80	2.23M	// simple_escaped only covers ASCII, whereas codepoint could be much
81	2.23M	// larger.
82	2.23M	if(codepoint < sizeof(simple_escaped) && simple_escaped[codepoint]1.11M ) {
83	388	token->codepoint = simple_escaped[codepoint];
84	388	parser->state = PASSGEN_TOKEN_INIT;
85	388
86	388	return;
87	388	}
88	2.23M
89	2.23M	switch(codepoint) {
90	1.11M	case 'u':
91	1.11M	parser->state = PASSGEN_TOKEN_UNICODE;
92	1.11M	break;
93	1.11M	default:
94	1.11M	token->codepoint = codepoint \| PASSGEN_TOKEN_ESCAPED_BIT;
95	1.11M	parser->state = PASSGEN_TOKEN_INIT;
96	2.23M	}
97	2.23M	}
98
99		static inline void
100	1.11M	token_parse_unicode(struct passgen_token_parser *parser, uint32_t codepoint) {
101	1.11M	if(codepoint == '{') {
102	1.11M	parser->state = PASSGEN_TOKEN_UNICODE_PAYLOAD;
103	1.11M	parser->data.unicode_payload.length = 0;
104	1.11M	parser->data.unicode_payload.codepoint = 0;
105	1.11M	} else {
106	51	parser->state = PASSGEN_TOKEN_ERROR_UNICODE_START;
107	51	}
108	1.11M	}
109
110		static inline void token_parse_unicode_payload(
111		struct passgen_token_parser *parser,
112		struct passgen_token *token,
113	6.68M	uint32_t codepoint) {
114	6.68M	// once we read the closing brace, the payload is over and we can emit the
115	6.68M	// token.
116	6.68M	if(codepoint == '}') {
117	1.11M	token->codepoint = parser->data.unicode_payload.codepoint;
118	1.11M	parser->state = PASSGEN_TOKEN_INIT;
119	1.11M
120	1.11M	return;
121	1.11M	}
122	5.56M
123	5.56M	// keep track of length, make sure it's not too long.
124	5.56M	parser->data.unicode_payload.length++;
125	5.56M	if(parser->data.unicode_payload.length > 6) {
126	6	parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN;
127	6
128	6	return;
129	6	}
130	5.56M
131	5.56M	// try to decode the hex value.
132	5.56M	int8_t decoded = hex_decode(codepoint);
133	5.56M	if(decoded < 0) {
134	10	parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD;
135	10
136	10	return;
137	10	}
138	5.56M
139	5.56M	parser->data.unicode_payload.codepoint *= 16;
140	5.56M	parser->data.unicode_payload.codepoint += decoded;
141	5.56M	}
142
143		int passgen_token_parse(
144		struct passgen_token_parser *parser,
145		struct passgen_token *token,
146		uint8_t width,
147	13.7M	uint32_t codepoint) {
148	13.7M	switch(parser->state) {
149	3.68M	case PASSGEN_TOKEN_INIT:
150	3.68M	token_parse_init(parser, token, codepoint);
151	3.68M	break;
152	2.23M	case PASSGEN_TOKEN_ESCAPED:
153	2.23M	token_parse_escaped(parser, token, codepoint);
154	2.23M	break;
155	1.11M	case PASSGEN_TOKEN_UNICODE:
156	1.11M	token_parse_unicode(parser, codepoint);
157	1.11M	break;
158	6.68M	case PASSGEN_TOKEN_UNICODE_PAYLOAD:
159	6.68M	token_parse_unicode_payload(parser, token, codepoint);
160	6.68M	break;
161	3	default:
162	3	return parser->state;
163	13.7M	}
164	13.7M
165	13.7M	// update parser offsets
166	13.7M	parser->offset += 1;
167	13.7M	parser->byte_offset += width;
168	13.7M
169	13.7M	return parser->state;
170	13.7M	}
171
172	254	const char *passgen_token_parse_error_str(int ret) {
173	254	return passgen_token_state_string(ret);
174	254	}

Coverage Report

Created: 2024-05-03 06:05