/builds/xfbs/passgen/src/tests/token.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "passgen/parser/token.h" |
2 | | #include "tests.h" |
3 | | |
4 | 3.34M | #define CODEPOINT_MAX 0x10FFFF |
5 | | |
6 | | // Test that normal (non-escaped) tokens are parsed corredly. They should |
7 | | // just be fed through. |
8 | 1 | test_result test_token_normal(void) { |
9 | 1 | passgen_token_parser parser; |
10 | 1 | passgen_token token; |
11 | 1 | passgen_token_parser_init(&parser); |
12 | 1 | |
13 | 1.11M | for(uint32_t codepoint = 0; codepoint < CODEPOINT_MAX; codepoint++1.11M ) { |
14 | 1.11M | if(codepoint == '\\') { |
15 | 1 | continue; |
16 | 1 | } |
17 | 1.11M | int ret = passgen_token_parse(&parser, &token, 1, codepoint); |
18 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_INIT); |
19 | 1.11M | assert_eq(parser.state, PASSGEN_TOKEN_INIT); |
20 | 1.11M | assert_eq(token.codepoint, codepoint); |
21 | 1.11M | } |
22 | 1 | |
23 | 1 | return test_ok; |
24 | 1 | } |
25 | | |
26 | | // Test that regular escapes are handled correctly: they should resolve |
27 | | // to the appropriate characters. |
28 | 1 | test_result test_token_escaped(void) { |
29 | 1 | passgen_token_parser parser; |
30 | 1 | passgen_token token; |
31 | 1 | passgen_token_parser_init(&parser); |
32 | 1 | |
33 | 1 | uint32_t escapes[][2] = { |
34 | 1 | {'\\', '\\'}, |
35 | 1 | {'a', '\a'}, |
36 | 1 | {'b', '\b'}, |
37 | 1 | {'f', '\f'}, |
38 | 1 | {'r', '\r'}, |
39 | 1 | {'n', '\n'}, |
40 | 1 | {'t', '\t'}, |
41 | 1 | {'v', '\v'}, |
42 | 1 | {'e', '\033'}, |
43 | 1 | {0, 0}}; |
44 | 1 | |
45 | 10 | for(size_t i = 0; escapes[i][0]; i++9 ) { |
46 | 9 | assert_eq( |
47 | 9 | passgen_token_parse(&parser, &token, 1, '\\'), |
48 | 9 | PASSGEN_TOKEN_ESCAPED); |
49 | 9 | assert_eq(parser.state, PASSGEN_TOKEN_ESCAPED); |
50 | 9 | assert_eq( |
51 | 9 | passgen_token_parse(&parser, &token, 1, escapes[i][0]), |
52 | 9 | PASSGEN_TOKEN_INIT); |
53 | 9 | assert_eq(parser.state, PASSGEN_TOKEN_INIT); |
54 | 9 | assert_eq(token.codepoint, escapes[i][1]); |
55 | 9 | } |
56 | 1 | |
57 | 1 | return test_ok; |
58 | 1 | } |
59 | | |
60 | | // Test that regular characters have the escape bit set when parsed with |
61 | | // leading backslashes. |
62 | 1 | test_result test_token_special_escaped(void) { |
63 | 1 | passgen_token_parser parser; |
64 | 1 | passgen_token token; |
65 | 1 | passgen_token_parser_init(&parser); |
66 | 1 | |
67 | 1.11M | for(uint32_t codepoint = 0; codepoint < CODEPOINT_MAX; codepoint++1.11M ) { |
68 | 1.11M | switch(codepoint) { |
69 | 10 | case '\\': |
70 | 10 | case 'a': |
71 | 10 | case 'b': |
72 | 10 | case 'f': |
73 | 10 | case 'r': |
74 | 10 | case 'n': |
75 | 10 | case 't': |
76 | 10 | case 'v': |
77 | 10 | case 'u': |
78 | 10 | case 'e': |
79 | 10 | continue; |
80 | 1.11M | default: |
81 | 1.11M | break; |
82 | 1.11M | } |
83 | 1.11M | int ret = passgen_token_parse(&parser, &token, 1, '\\'); |
84 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_ESCAPED); |
85 | 1.11M | assert_eq(parser.state, PASSGEN_TOKEN_ESCAPED); |
86 | 1.11M | |
87 | 1.11M | ret = passgen_token_parse(&parser, &token, 1, codepoint); |
88 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_INIT); |
89 | 1.11M | assert_eq(parser.state, PASSGEN_TOKEN_INIT); |
90 | 1.11M | assert_eq(token.codepoint, (codepoint | PASSGEN_TOKEN_ESCAPED_BIT)); |
91 | 1.11M | } |
92 | 1 | |
93 | 1 | return test_ok; |
94 | 1 | } |
95 | | |
96 | | // Test that escaped unicode tokens (eg: \u{ffff}) get parsed correctly. |
97 | 1 | test_result test_token_unicode(void) { |
98 | 1 | passgen_token_parser parser; |
99 | 1 | passgen_token token; |
100 | 1 | passgen_token_parser_init(&parser); |
101 | 1 | |
102 | 1 | char buffer[8]; |
103 | 1.11M | for(uint32_t codepoint = 0; codepoint < CODEPOINT_MAX; codepoint++1.11M ) { |
104 | 1.11M | sprintf(buffer, "%x", codepoint); |
105 | 1.11M | int ret = passgen_token_parse(&parser, &token, 1, '\\'); |
106 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_ESCAPED); |
107 | 1.11M | |
108 | 1.11M | ret = passgen_token_parse(&parser, &token, 1, 'u'); |
109 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_UNICODE); |
110 | 1.11M | |
111 | 1.11M | ret = passgen_token_parse(&parser, &token, 1, '{'); |
112 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_UNICODE_PAYLOAD); |
113 | 1.11M | assert_eq(parser.data.unicode_payload.length, 0); |
114 | 1.11M | |
115 | 6.68M | for(size_t i = 0; buffer[i]; i++5.56M ) { |
116 | 5.56M | ret = passgen_token_parse(&parser, &token, 1, buffer[i]); |
117 | 5.56M | assert_eq(ret, PASSGEN_TOKEN_UNICODE_PAYLOAD); |
118 | 5.56M | assert_eq(parser.data.unicode_payload.length, i + 1); |
119 | 5.56M | } |
120 | 1.11M | |
121 | 1.11M | ret = passgen_token_parse(&parser, &token, 1, '}'); |
122 | 1.11M | assert_eq(ret, PASSGEN_TOKEN_INIT); |
123 | 1.11M | assert_eq(token.codepoint, codepoint); |
124 | 1.11M | } |
125 | 1 | |
126 | 1 | return test_ok; |
127 | 1 | } |
128 | | |
129 | | // Test that passing any character that is not an opening brace after \u |
130 | | // causes an error state (so \u{FC} is fine, but \u[ is not). |
131 | 1 | test_result test_token_unicode_error_start(void) { |
132 | 1 | passgen_token_parser parser; |
133 | 1 | passgen_token token; |
134 | 1 | passgen_token_parser_init(&parser); |
135 | 1 | |
136 | 1 | uint32_t chars[] = |
137 | 1 | {'a', 'b', 'c', 'x', 'y', '0', '9', '-', '_', '}', '[', 0}; |
138 | 1 | |
139 | 12 | for(size_t i = 0; chars[i]; i++11 ) { |
140 | 11 | passgen_token_parser_init(&parser); |
141 | 11 | assert_eq( |
142 | 11 | passgen_token_parse(&parser, &token, 1, '\\'), |
143 | 11 | PASSGEN_TOKEN_ESCAPED); |
144 | 11 | assert_eq( |
145 | 11 | passgen_token_parse(&parser, &token, 1, 'u'), |
146 | 11 | PASSGEN_TOKEN_UNICODE); |
147 | 11 | assert_eq( |
148 | 11 | passgen_token_parse(&parser, &token, 1, chars[i]), |
149 | 11 | PASSGEN_TOKEN_ERROR_UNICODE_START); |
150 | 11 | } |
151 | 1 | |
152 | 1 | return test_ok; |
153 | 1 | } |
154 | | |
155 | | // Test that passing any character that is not a hexadecimal character after \u{ |
156 | | // causes an error state (so \u{FC} is fine, but \u{ZZ} is not). |
157 | 1 | test_result test_token_unicode_error_payload(void) { |
158 | 1 | passgen_token_parser parser; |
159 | 1 | passgen_token token; |
160 | 1 | |
161 | 1 | uint32_t chars[] = {'x', ' ', '_', '-', '!', '+', '=', 'w', 'g', '[', 0}; |
162 | 1 | |
163 | 11 | for(size_t i = 0; chars[i]; i++10 ) { |
164 | 10 | passgen_token_parser_init(&parser); |
165 | 10 | assert_eq( |
166 | 10 | passgen_token_parse(&parser, &token, 1, '\\'), |
167 | 10 | PASSGEN_TOKEN_ESCAPED); |
168 | 10 | assert_eq( |
169 | 10 | passgen_token_parse(&parser, &token, 1, 'u'), |
170 | 10 | PASSGEN_TOKEN_UNICODE); |
171 | 10 | assert_eq( |
172 | 10 | passgen_token_parse(&parser, &token, 1, '{'), |
173 | 10 | PASSGEN_TOKEN_UNICODE_PAYLOAD); |
174 | 10 | assert_eq( |
175 | 10 | passgen_token_parse(&parser, &token, 1, chars[i]), |
176 | 10 | PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD); |
177 | 10 | } |
178 | 1 | |
179 | 1 | return test_ok; |
180 | 1 | } |
181 | | |
182 | | // Test that passing any character that is not an opening brace after \u |
183 | | // causes an error state (so \u{FC} is fine, but \u[ is not). |
184 | 1 | test_result test_token_unicode_error_len(void) { |
185 | 1 | passgen_token_parser parser; |
186 | 1 | passgen_token token; |
187 | 1 | |
188 | 1 | const uint32_t inputs[][7] = { |
189 | 1 | {'0', '0', '0', '0', '0', '0', '0'}, |
190 | 1 | {'1', '0', 'f', 'f', 'f', 'f', '0'}, |
191 | 1 | {'f', 'f', 'f', 'f', 'f', 'f', 'f'}, |
192 | 1 | {'0', '1', '2', '3', '4', '5', '6'}, |
193 | 1 | {0}}; |
194 | 1 | |
195 | 5 | for(size_t i = 0; inputs[i][0]; i++4 ) { |
196 | 4 | passgen_token_parser_init(&parser); |
197 | 4 | assert_eq( |
198 | 4 | passgen_token_parse(&parser, &token, 1, '\\'), |
199 | 4 | PASSGEN_TOKEN_ESCAPED); |
200 | 4 | assert_eq( |
201 | 4 | passgen_token_parse(&parser, &token, 1, 'u'), |
202 | 4 | PASSGEN_TOKEN_UNICODE); |
203 | 4 | assert_eq( |
204 | 4 | passgen_token_parse(&parser, &token, 1, '{'), |
205 | 4 | PASSGEN_TOKEN_UNICODE_PAYLOAD); |
206 | 4 | assert_eq(parser.data.unicode_payload.length, 0); |
207 | 28 | for(size_t c = 0; c < 6; c++24 ) { |
208 | 24 | assert_eq( |
209 | 24 | passgen_token_parse(&parser, &token, 1, inputs[i][c]), |
210 | 24 | PASSGEN_TOKEN_UNICODE_PAYLOAD); |
211 | 24 | assert_eq(parser.data.unicode_payload.length, c + 1); |
212 | 24 | } |
213 | 4 | assert_eq( |
214 | 4 | passgen_token_parse(&parser, &token, 1, inputs[i][6]), |
215 | 4 | PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN); |
216 | 4 | assert_eq(parser.data.unicode_payload.length, 7); |
217 | 4 | } |
218 | 1 | |
219 | 1 | return test_ok; |
220 | 1 | } |
221 | | |
222 | 1 | test_result test_token_state_string(void) { |
223 | 1 | // initial state |
224 | 1 | assert(passgen_token_state_string(PASSGEN_TOKEN_INIT) != NULL); |
225 | 1 | |
226 | 1 | // multi-charpoint states |
227 | 1 | assert(passgen_token_state_string(PASSGEN_TOKEN_ESCAPED) != NULL); |
228 | 1 | assert(passgen_token_state_string(PASSGEN_TOKEN_UNICODE) != NULL); |
229 | 1 | assert(passgen_token_state_string(PASSGEN_TOKEN_UNICODE_PAYLOAD) != NULL); |
230 | 1 | |
231 | 1 | // error states |
232 | 1 | assert( |
233 | 1 | passgen_token_state_string(PASSGEN_TOKEN_ERROR_UNICODE_START) != NULL); |
234 | 1 | assert( |
235 | 1 | passgen_token_state_string(PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD) != |
236 | 1 | NULL); |
237 | 1 | assert( |
238 | 1 | passgen_token_state_string(PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN) != |
239 | 1 | NULL); |
240 | 1 | |
241 | 1 | // undefined |
242 | 1 | assert( |
243 | 1 | passgen_token_state_string(PASSGEN_TOKEN_UNICODE_PAYLOAD + 1) == NULL); |
244 | 1 | assert( |
245 | 1 | passgen_token_state_string( |
246 | 1 | PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN - 1) == NULL); |
247 | 1 | |
248 | 1 | return test_ok; |
249 | 1 | } |
250 | | |
251 | | // Test that the token parser correctly keeps track of byte and codepoint |
252 | | // offsets. |
253 | 1 | test_result test_token_normal_offsets(void) { |
254 | 1 | passgen_token_parser parser = {0}; |
255 | 1 | passgen_token token = {0}; |
256 | 1 | |
257 | 1 | #define PARSE(cp, width) \ |
258 | 7 | assert( \ |
259 | 7 | passgen_token_parse(&parser, &token, width, cp) == \ |
260 | 7 | PASSGEN_TOKEN_INIT); \ |
261 | 7 | assert(parser.state == PASSGEN_TOKEN_INIT); \ |
262 | 7 | assert(token.codepoint == cp); |
263 | 1 | |
264 | 3 | PARSE1 ('a', 1); |
265 | 3 | assert1 (parser.offset == 1); |
266 | 1 | assert(parser.byte_offset == 1); |
267 | 1 | assert(token.offset == 0); |
268 | 1 | assert(token.byte_offset == 0); |
269 | 1 | |
270 | 3 | PARSE1 ('b', 1); |
271 | 3 | assert1 (parser.offset == 2); |
272 | 1 | assert(parser.byte_offset == 2); |
273 | 1 | assert(token.offset == 1); |
274 | 1 | assert(token.byte_offset == 1); |
275 | 1 | |
276 | 3 | PARSE1 ('c', 2); |
277 | 3 | assert1 (parser.offset == 3); |
278 | 1 | assert(parser.byte_offset == 4); |
279 | 1 | assert(token.offset == 2); |
280 | 1 | assert(token.byte_offset == 2); |
281 | 1 | |
282 | 3 | PARSE1 (' ', 2); |
283 | 3 | assert1 (parser.offset == 4); |
284 | 1 | assert(parser.byte_offset == 6); |
285 | 1 | assert(token.offset == 3); |
286 | 1 | assert(token.byte_offset == 4); |
287 | 1 | |
288 | 3 | PARSE1 ('!', 3); |
289 | 3 | assert1 (parser.offset == 5); |
290 | 1 | assert(parser.byte_offset == 9); |
291 | 1 | assert(token.offset == 4); |
292 | 1 | assert(token.byte_offset == 6); |
293 | 1 | |
294 | 3 | PARSE1 ('[', 3); |
295 | 3 | assert1 (parser.offset == 6); |
296 | 1 | assert(parser.byte_offset == 12); |
297 | 1 | assert(token.offset == 5); |
298 | 1 | assert(token.byte_offset == 9); |
299 | 1 | |
300 | 3 | PARSE1 (']', 4); |
301 | 3 | assert1 (parser.offset == 7); |
302 | 1 | assert(parser.byte_offset == 16); |
303 | 1 | assert(token.offset == 6); |
304 | 1 | assert(token.byte_offset == 12); |
305 | 1 | |
306 | 1 | #undef PARSE |
307 | 1 | |
308 | 1 | return test_ok; |
309 | 1 | } |
310 | | |
311 | | // Test that the token parser correctly keeps track of byte and codepoint |
312 | | // offsets. |
313 | 1 | test_result test_token_multi_offsets(void) { |
314 | 1 | passgen_token_parser parser = {0}; |
315 | 1 | passgen_token token = {0}; |
316 | 1 | |
317 | 1 | assert( |
318 | 1 | passgen_token_parse(&parser, &token, 1, '\\') == PASSGEN_TOKEN_ESCAPED); |
319 | 1 | assert(passgen_token_parse(&parser, &token, 1, '[') == PASSGEN_TOKEN_INIT); |
320 | 1 | assert(parser.offset == 2); |
321 | 1 | assert(parser.byte_offset == 2); |
322 | 1 | assert(token.offset == 0); |
323 | 1 | assert(token.byte_offset == 0); |
324 | 1 | |
325 | 1 | assert( |
326 | 1 | passgen_token_parse(&parser, &token, 1, '\\') == PASSGEN_TOKEN_ESCAPED); |
327 | 1 | assert(passgen_token_parse(&parser, &token, 1, ']') == PASSGEN_TOKEN_INIT); |
328 | 1 | assert(parser.offset == 4); |
329 | 1 | assert(parser.byte_offset == 4); |
330 | 1 | assert(token.offset == 2); |
331 | 1 | assert(token.byte_offset == 2); |
332 | 1 | |
333 | 1 | assert( |
334 | 1 | passgen_token_parse(&parser, &token, 1, '\\') == PASSGEN_TOKEN_ESCAPED); |
335 | 1 | assert( |
336 | 1 | passgen_token_parse(&parser, &token, 1, 'u') == PASSGEN_TOKEN_UNICODE); |
337 | 1 | assert( |
338 | 1 | passgen_token_parse(&parser, &token, 1, '{') == |
339 | 1 | PASSGEN_TOKEN_UNICODE_PAYLOAD); |
340 | 1 | assert( |
341 | 1 | passgen_token_parse(&parser, &token, 1, '0') == |
342 | 1 | PASSGEN_TOKEN_UNICODE_PAYLOAD); |
343 | 1 | assert( |
344 | 1 | passgen_token_parse(&parser, &token, 1, 'a') == |
345 | 1 | PASSGEN_TOKEN_UNICODE_PAYLOAD); |
346 | 1 | assert(passgen_token_parse(&parser, &token, 1, '}') == PASSGEN_TOKEN_INIT); |
347 | 1 | assert(parser.offset == 10); |
348 | 1 | assert(parser.byte_offset == 10); |
349 | 1 | assert(token.offset == 4); |
350 | 1 | assert(token.byte_offset == 4); |
351 | 1 | |
352 | 1 | return test_ok; |
353 | 1 | } |
354 | | |
355 | | // Test that parsing any character in an error state simply returns that error |
356 | | // state. |
357 | 1 | test_result test_token_error_propagation(void) { |
358 | 1 | passgen_token_parser parser; |
359 | 1 | passgen_token token; |
360 | 1 | int errors[] = { |
361 | 1 | PASSGEN_TOKEN_ERROR_UNICODE_START, |
362 | 1 | PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD, |
363 | 1 | PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN, |
364 | 1 | 0}; |
365 | 1 | |
366 | 4 | for(size_t i = 0; errors[i]; i++3 ) { |
367 | 3 | passgen_token_parser_init(&parser); |
368 | 3 | parser.state = errors[i]; |
369 | 3 | assert_eq(passgen_token_parse(&parser, &token, 1, 'a'), errors[i]); |
370 | 3 | } |
371 | 1 | |
372 | 1 | return test_ok; |
373 | 1 | } |