/builds/xfbs/passgen/src/tests/parser.c
Line | Count | Source |
1 | | #include "passgen/parser/parser.h" |
2 | | #include "passgen/container/stack.h" |
3 | | #include "passgen/parser/token.h" |
4 | | #include "passgen/pattern/group.h" |
5 | | #include "passgen/pattern/literal.h" |
6 | | #include "passgen/pattern/pattern.h" |
7 | | #include "passgen/pattern/range.h" |
8 | | #include "passgen/pattern/repeat.h" |
9 | | #include "passgen/pattern/segment.h" |
10 | | #include "passgen/pattern/segment_item.h" |
11 | | #include "passgen/pattern/set.h" |
12 | | #include "passgen/util/random.h" |
13 | | #include "passgen/util/utf8.h" |
14 | | #include "tests.h" |
15 | | #include <passgen/passgen.h> |
16 | | #include <stdlib.h> |
17 | | |
18 | | #define PREAMBLE() \ |
19 | 22 | passgen_parser parser; \ |
20 | 22 | passgen_token_parser token_parser = {0}; \ |
21 | 22 | passgen_token token = {0}; \ |
22 | 22 | passgen_pattern_segment *segment; \ |
23 | 22 | passgen_pattern_item *item; \ |
24 | 22 | passgen_pattern parsed_pattern; \ |
25 | 22 | passgen_parser_init(&parser, &parsed_pattern); \ |
26 | 22 | (void) item; \ |
27 | 22 | (void) segment |
28 | | |
29 | | #define POSTAMBLE() \ |
30 | 20 | assert_eq(0, passgen_parse_finish(&parser))1 ; \ |
31 | 20 | passgen_parser_free(&parser); \ |
32 | 20 | passgen_pattern_free(&parsed_pattern); |
33 | | |
34 | | #define PARSE_CODEPOINT(codepoint) \ |
35 | 725 | assert( \ |
36 | 1 | passgen_token_parse(&token_parser, &token, 1, codepoint) == \ |
37 | 725 | PASSGEN_TOKEN_INIT); \ |
38 | 725 | assert_eq(0, passgen_parse_token(&parser, &token)) |
39 | | |
40 | | #define PARSE_CODEPOINT_DOUBLE(a, b) \ |
41 | 7 | assert(passgen_token_parse(&token_parser, &token, 1, a) > 0); \ |
42 | 7 | assert( \ |
43 | 7 | passgen_token_parse(&token_parser, &token, 1, b) == \ |
44 | 7 | PASSGEN_TOKEN_INIT); \ |
45 | 7 | assert_eq(0, passgen_parse_token(&parser, &token)) |
46 | | |
47 | 1 | test_result test_parser_empty(void) { |
48 | 1 | PREAMBLE(); |
49 | 1 | (void) item; |
50 | 1 | (void) token; |
51 | 1 | (void) token_parser; |
52 | 1 | |
53 | 1 | // single empty segment |
54 | 1 | assert(1 == parser.pattern->group.segments.len); |
55 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
56 | 1 | assert(0 == segment->items.len); |
57 | 1 | |
58 | 1 | POSTAMBLE(); |
59 | 1 | |
60 | 1 | return test_ok; |
61 | 1 | } |
62 | | |
63 | 1 | test_result test_parser_segment_multiplier(void) { |
64 | 1 | PREAMBLE(); |
65 | 1 | PARSE_CODEPOINT('('); |
66 | 1 | |
67 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
68 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
69 | 1 | |
70 | 1 | PARSE_CODEPOINT('{'); |
71 | 1 | PARSE_CODEPOINT('5'); |
72 | 1 | PARSE_CODEPOINT('}'); |
73 | 1 | |
74 | 1 | assert_eq(item->data.group.segments.len, 1); |
75 | 1 | assert_eq(item->data.group.multiplier_sum, 5); |
76 | 1 | segment = passgen_pattern_group_segment_get(&item->data.group, 0); |
77 | 1 | assert_eq(segment->multiplier, 5); |
78 | 1 | |
79 | 1 | PARSE_CODEPOINT('a'); |
80 | 1 | |
81 | 1 | assert_eq(segment->items.len, 1); |
82 | 1 | |
83 | 1 | PARSE_CODEPOINT('|'); |
84 | 1 | |
85 | 1 | assert_eq(item->data.group.segments.len, 2); |
86 | 1 | assert_eq(item->data.group.multiplier_sum, 6); |
87 | 1 | segment = passgen_pattern_group_segment_get(&item->data.group, 1); |
88 | 1 | assert_eq(segment->multiplier, 1); |
89 | 1 | |
90 | 1 | PARSE_CODEPOINT('b'); |
91 | 1 | PARSE_CODEPOINT('c'); |
92 | 1 | |
93 | 1 | // one item (two codepoints in a single literal) |
94 | 1 | assert_eq(segment->items.len, 1); |
95 | 1 | |
96 | 1 | PARSE_CODEPOINT('|'); |
97 | 1 | |
98 | 1 | assert_eq(item->data.group.segments.len, 3); |
99 | 1 | assert_eq(item->data.group.multiplier_sum, 7); |
100 | 1 | segment = passgen_pattern_group_segment_get(&item->data.group, 2); |
101 | 1 | assert_eq(segment->multiplier, 1); |
102 | 1 | |
103 | 1 | PARSE_CODEPOINT('{'); |
104 | 1 | PARSE_CODEPOINT('3'); |
105 | 1 | PARSE_CODEPOINT('}'); |
106 | 1 | |
107 | 1 | assert_eq(item->data.group.multiplier_sum, 9); |
108 | 1 | assert_eq(segment->multiplier, 3); |
109 | 1 | |
110 | 1 | PARSE_CODEPOINT('c'); |
111 | 1 | |
112 | 1 | assert_eq(segment->items.len, 1); |
113 | 1 | |
114 | 1 | PARSE_CODEPOINT(')'); |
115 | 1 | |
116 | 1 | assert_eq(item->data.group.multiplier_sum, 9); |
117 | 1 | |
118 | 1 | POSTAMBLE(); |
119 | 1 | |
120 | 1 | return test_ok; |
121 | 1 | } |
122 | | |
123 | | // when parsing a group, skip over any segments that have a zero multiplier. |
124 | 1 | test_result test_parser_skip_zero_segment(void) { |
125 | 1 | PREAMBLE(); |
126 | 1 | PARSE_CODEPOINT('('); |
127 | 1 | |
128 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
129 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
130 | 1 | assert_eq(item->data.group.segments.len, 1); |
131 | 1 | |
132 | 101 | for(size_t i = 0; i < 100; i++100 ) { |
133 | 100 | PARSE_CODEPOINT('{'); |
134 | 100 | PARSE_CODEPOINT('0'); |
135 | 100 | PARSE_CODEPOINT('}'); |
136 | 100 | PARSE_CODEPOINT('|'); |
137 | 100 | |
138 | 100 | // because the multiplier is zero, this segment is removed |
139 | 100 | assert_eq(item->data.group.segments.len, 1); |
140 | 100 | } |
141 | 1 | |
142 | 2 | PARSE_CODEPOINT1 ('{'); |
143 | 1 | PARSE_CODEPOINT('0'); |
144 | 1 | PARSE_CODEPOINT('}'); |
145 | 1 | PARSE_CODEPOINT(')'); |
146 | 1 | |
147 | 1 | // final segment is also removed, leaving no segments |
148 | 1 | assert_eq(item->data.group.segments.len, 0); |
149 | 1 | |
150 | 1 | POSTAMBLE(); |
151 | 1 | return test_ok; |
152 | 1 | } |
153 | | |
154 | 1 | test_result test_parser_empty_group(void) { |
155 | 1 | PREAMBLE(); |
156 | 1 | PARSE_CODEPOINT('('); |
157 | 1 | |
158 | 1 | assert(1 == parser.pattern->group.segments.len); |
159 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
160 | 1 | assert(segment); |
161 | 1 | assert(1 == segment->items.len); |
162 | 1 | |
163 | 1 | // group with one empty segment |
164 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
165 | 1 | assert(item); |
166 | 1 | assert(item->kind == PASSGEN_PATTERN_GROUP); |
167 | 1 | assert_eq(item->data.group.segments.len, 1); |
168 | 1 | |
169 | 1 | PARSE_CODEPOINT(')'); |
170 | 1 | |
171 | 1 | assert(1 == parser.pattern->group.segments.len); |
172 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
173 | 1 | assert(segment); |
174 | 1 | assert(1 == segment->items.len); |
175 | 1 | |
176 | 101 | for(size_t i = 0; i < 100; i++100 ) { |
177 | 100 | PARSE_CODEPOINT('('); |
178 | 100 | PARSE_CODEPOINT(')'); |
179 | 100 | |
180 | 100 | assert(1 == parser.pattern->group.segments.len); |
181 | 100 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
182 | 100 | assert(segment); |
183 | 100 | assert(1 == segment->items.len); |
184 | 100 | } |
185 | 1 | |
186 | 1 | POSTAMBLE(); |
187 | 1 | return test_ok; |
188 | 1 | } |
189 | | |
190 | 1 | test_result test_parser_single_char(void) { |
191 | 1 | PREAMBLE(); |
192 | 1 | PARSE_CODEPOINT('a'); |
193 | 1 | |
194 | 1 | // single segment containing char 'a' |
195 | 1 | assert(1 == parser.pattern->group.segments.len); |
196 | 1 | |
197 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
198 | 1 | assert(segment); |
199 | 1 | assert(1 == segment->items.len); |
200 | 1 | |
201 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
202 | 1 | assert(item); |
203 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
204 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
205 | 1 | assert(item->data.literal.count == 1); |
206 | 1 | |
207 | 1 | POSTAMBLE(); |
208 | 1 | |
209 | 1 | return test_ok; |
210 | 1 | } |
211 | | |
212 | 1 | test_result test_parser_multi_char(void) { |
213 | 1 | PREAMBLE(); |
214 | 1 | PARSE_CODEPOINT('a'); |
215 | 1 | PARSE_CODEPOINT('b'); |
216 | 1 | |
217 | 1 | // single segment containing char 'a' |
218 | 1 | assert(1 == parser.pattern->group.segments.len); |
219 | 1 | |
220 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
221 | 1 | assert(segment); |
222 | 1 | assert(1 == segment->items.len); |
223 | 1 | |
224 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
225 | 1 | assert(item); |
226 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
227 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
228 | 1 | assert(item->data.literal.codepoints[1] == 'b'); |
229 | 1 | assert(item->data.literal.count == 2); |
230 | 1 | |
231 | 1 | POSTAMBLE(); |
232 | 1 | |
233 | 1 | return test_ok; |
234 | 1 | } |
235 | | |
236 | 1 | test_result test_parser_multi_groups(void) { |
237 | 1 | PREAMBLE(); |
238 | 1 | PARSE_CODEPOINT('a'); |
239 | 1 | PARSE_CODEPOINT('|'); |
240 | 1 | PARSE_CODEPOINT('b'); |
241 | 1 | |
242 | 1 | assert(2 == parser.pattern->group.segments.len); |
243 | 1 | |
244 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
245 | 1 | assert(1 == segment->items.len); |
246 | 1 | |
247 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
248 | 1 | assert(item); |
249 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
250 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
251 | 1 | assert(item->data.literal.count == 1); |
252 | 1 | |
253 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 1); |
254 | 1 | assert(1 == segment->items.len); |
255 | 1 | |
256 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
257 | 1 | assert(item); |
258 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
259 | 1 | assert(item->data.literal.codepoints[0] == 'b'); |
260 | 1 | assert(item->data.literal.count == 1); |
261 | 1 | |
262 | 1 | POSTAMBLE(); |
263 | 1 | |
264 | 1 | return test_ok; |
265 | 1 | } |
266 | | |
267 | 1 | test_result test_parser_nested_groups(void) { |
268 | 1 | PREAMBLE(); |
269 | 1 | PARSE_CODEPOINT('('); |
270 | 1 | PARSE_CODEPOINT('a'); |
271 | 1 | PARSE_CODEPOINT(')'); |
272 | 1 | |
273 | 1 | assert(1 == parser.pattern->group.segments.len); |
274 | 1 | |
275 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
276 | 1 | assert(1 == segment->items.len); |
277 | 1 | |
278 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
279 | 1 | assert(item); |
280 | 1 | assert(item->kind == PASSGEN_PATTERN_GROUP); |
281 | 1 | |
282 | 1 | assert(1 == item->data.group.segments.len); |
283 | 1 | |
284 | 1 | segment = passgen_pattern_group_segment_get(&item->data.group, 0); |
285 | 1 | assert(1 == segment->items.len); |
286 | 1 | |
287 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
288 | 1 | assert(item); |
289 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
290 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
291 | 1 | assert(item->data.literal.count == 1); |
292 | 1 | assert(item->repeat.min == 1); |
293 | 1 | assert(item->repeat.max == 1); |
294 | 1 | |
295 | 1 | POSTAMBLE(); |
296 | 1 | |
297 | 1 | return test_ok; |
298 | 1 | } |
299 | | |
300 | 1 | test_result test_parser_depth_limit(void) { |
301 | 1 | PREAMBLE(); |
302 | 1 | parser.limit = 5; |
303 | 1 | PARSE_CODEPOINT('('); |
304 | 1 | PARSE_CODEPOINT('('); |
305 | 1 | PARSE_CODEPOINT('('); |
306 | 1 | PARSE_CODEPOINT('('); |
307 | 1 | assert(0 != passgen_parse_token(&parser, &token)); |
308 | 1 | |
309 | 1 | assert(0 != passgen_parse_finish(&parser)); |
310 | 1 | passgen_parser_free(&parser); |
311 | 1 | passgen_pattern_free(&parsed_pattern); |
312 | 1 | |
313 | 1 | return test_ok; |
314 | 1 | } |
315 | | |
316 | 1 | test_result test_parser_multi_nested_groups(void) { |
317 | 1 | PREAMBLE(); |
318 | 1 | PARSE_CODEPOINT('('); |
319 | 1 | PARSE_CODEPOINT('a'); |
320 | 1 | PARSE_CODEPOINT(')'); |
321 | 1 | PARSE_CODEPOINT('('); |
322 | 1 | PARSE_CODEPOINT('b'); |
323 | 1 | PARSE_CODEPOINT(')'); |
324 | 1 | |
325 | 1 | assert(1 == parser.pattern->group.segments.len); |
326 | 1 | |
327 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
328 | 1 | assert(2 == segment->items.len); |
329 | 1 | |
330 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
331 | 1 | assert(item); |
332 | 1 | assert(item->kind == PASSGEN_PATTERN_GROUP); |
333 | 1 | |
334 | 1 | assert(1 == item->data.group.segments.len); |
335 | 1 | |
336 | 1 | segment = passgen_pattern_group_segment_get(&item->data.group, 0); |
337 | 1 | assert(1 == segment->items.len); |
338 | 1 | |
339 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
340 | 1 | assert(item); |
341 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
342 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
343 | 1 | assert(item->data.literal.count == 1); |
344 | 1 | |
345 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
346 | 1 | item = passgen_pattern_segment_get_item(segment, 1); |
347 | 1 | assert(item); |
348 | 1 | assert(item->kind == PASSGEN_PATTERN_GROUP); |
349 | 1 | |
350 | 1 | assert(1 == item->data.group.segments.len); |
351 | 1 | |
352 | 1 | segment = passgen_pattern_group_segment_get(&item->data.group, 0); |
353 | 1 | assert(1 == segment->items.len); |
354 | 1 | |
355 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
356 | 1 | assert(item); |
357 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
358 | 1 | assert(item->data.literal.codepoints[0] == 'b'); |
359 | 1 | assert(item->data.literal.count == 1); |
360 | 1 | |
361 | 1 | POSTAMBLE(); |
362 | 1 | |
363 | 1 | return test_ok; |
364 | 1 | } |
365 | | |
366 | 1 | test_result test_parser_set_simple(void) { |
367 | 1 | PREAMBLE(); |
368 | 1 | PARSE_CODEPOINT('['); |
369 | 1 | PARSE_CODEPOINT('a'); |
370 | 1 | PARSE_CODEPOINT('b'); |
371 | 1 | PARSE_CODEPOINT(']'); |
372 | 1 | passgen_pattern_range *range; |
373 | 1 | |
374 | 1 | // single segment containing char 'a' |
375 | 1 | assert(1 == parser.pattern->group.segments.len); |
376 | 1 | |
377 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
378 | 1 | assert(segment); |
379 | 1 | assert(1 == segment->items.len); |
380 | 1 | |
381 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
382 | 1 | assert(item); |
383 | 1 | assert(item->kind == PASSGEN_PATTERN_SET); |
384 | 1 | |
385 | 1 | assert(item->data.set.items.len == 2); |
386 | 1 | |
387 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 0); |
388 | 1 | assert(range); |
389 | 1 | assert(range->start == 'a'); |
390 | 1 | assert(range->end == 'a'); |
391 | 1 | |
392 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 1); |
393 | 1 | assert(range); |
394 | 1 | assert(range->start == 'b'); |
395 | 1 | assert(range->end == 'b'); |
396 | 1 | |
397 | 1 | POSTAMBLE(); |
398 | 1 | |
399 | 1 | return test_ok; |
400 | 1 | } |
401 | | |
402 | 1 | test_result test_parser_set_simple_escaped(void) { |
403 | 1 | PREAMBLE(); |
404 | 1 | PARSE_CODEPOINT('['); |
405 | 1 | PARSE_CODEPOINT('a'); |
406 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', '-'); |
407 | 1 | PARSE_CODEPOINT('b'); |
408 | 1 | PARSE_CODEPOINT(']'); |
409 | 1 | passgen_pattern_range *range; |
410 | 1 | |
411 | 1 | // single segment containing char 'a' |
412 | 1 | assert(1 == parser.pattern->group.segments.len); |
413 | 1 | |
414 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
415 | 1 | assert(segment); |
416 | 1 | assert(1 == segment->items.len); |
417 | 1 | |
418 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
419 | 1 | assert(item); |
420 | 1 | assert(item->kind == PASSGEN_PATTERN_SET); |
421 | 1 | |
422 | 1 | assert(item->data.set.items.len == 3); |
423 | 1 | |
424 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 0); |
425 | 1 | assert(range); |
426 | 1 | assert(range->start == 'a'); |
427 | 1 | assert(range->end == 'a'); |
428 | 1 | |
429 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 1); |
430 | 1 | assert(range); |
431 | 1 | assert(range->start == '-'); |
432 | 1 | assert(range->end == '-'); |
433 | 1 | |
434 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 2); |
435 | 1 | assert(range); |
436 | 1 | assert(range->start == 'b'); |
437 | 1 | assert(range->end == 'b'); |
438 | 1 | |
439 | 1 | POSTAMBLE(); |
440 | 1 | |
441 | 1 | return test_ok; |
442 | 1 | } |
443 | | |
444 | 1 | test_result test_parser_range_simple(void) { |
445 | 1 | PREAMBLE(); |
446 | 1 | PARSE_CODEPOINT('['); |
447 | 1 | PARSE_CODEPOINT('a'); |
448 | 1 | PARSE_CODEPOINT('-'); |
449 | 1 | PARSE_CODEPOINT('b'); |
450 | 1 | PARSE_CODEPOINT(']'); |
451 | 1 | passgen_pattern_range *range; |
452 | 1 | |
453 | 1 | // single segment containing char 'a' |
454 | 1 | assert(1 == parser.pattern->group.segments.len); |
455 | 1 | |
456 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
457 | 1 | assert(segment); |
458 | 1 | assert(1 == segment->items.len); |
459 | 1 | |
460 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
461 | 1 | assert(item); |
462 | 1 | assert(item->kind == PASSGEN_PATTERN_SET); |
463 | 1 | |
464 | 1 | assert(item->data.set.items.len == 1); |
465 | 1 | |
466 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 0); |
467 | 1 | assert(range); |
468 | 1 | assert(range->start == 'a'); |
469 | 1 | assert(range->end == 'b'); |
470 | 1 | |
471 | 1 | POSTAMBLE(); |
472 | 1 | |
473 | 1 | return test_ok; |
474 | 1 | } |
475 | | |
476 | 1 | test_result test_parser_range_multiple(void) { |
477 | 1 | PREAMBLE(); |
478 | 1 | PARSE_CODEPOINT('['); |
479 | 1 | PARSE_CODEPOINT('a'); |
480 | 1 | PARSE_CODEPOINT('-'); |
481 | 1 | PARSE_CODEPOINT('b'); |
482 | 1 | PARSE_CODEPOINT('c'); |
483 | 1 | PARSE_CODEPOINT('-'); |
484 | 1 | PARSE_CODEPOINT('d'); |
485 | 1 | PARSE_CODEPOINT(']'); |
486 | 1 | passgen_pattern_range *range; |
487 | 1 | |
488 | 1 | // single segment containing char 'a' |
489 | 1 | assert(1 == parser.pattern->group.segments.len); |
490 | 1 | |
491 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
492 | 1 | assert(segment); |
493 | 1 | assert(1 == segment->items.len); |
494 | 1 | |
495 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
496 | 1 | assert(item); |
497 | 1 | assert(item->kind == PASSGEN_PATTERN_SET); |
498 | 1 | |
499 | 1 | assert(item->data.set.items.len == 2); |
500 | 1 | |
501 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 0); |
502 | 1 | assert(range); |
503 | 1 | assert(range->start == 'a'); |
504 | 1 | assert(range->end == 'b'); |
505 | 1 | |
506 | 1 | range = passgen_pattern_set_range_get(&item->data.set, 1); |
507 | 1 | assert(range); |
508 | 1 | assert(range->start == 'c'); |
509 | 1 | assert(range->end == 'd'); |
510 | 1 | |
511 | 1 | POSTAMBLE(); |
512 | 1 | |
513 | 1 | return test_ok; |
514 | 1 | } |
515 | | |
516 | 1 | test_result test_parser_char_repeat(void) { |
517 | 1 | PREAMBLE(); |
518 | 1 | PARSE_CODEPOINT('a'); |
519 | 1 | PARSE_CODEPOINT('{'); |
520 | 1 | PARSE_CODEPOINT('2'); |
521 | 1 | PARSE_CODEPOINT('}'); |
522 | 1 | |
523 | 1 | // single segment containing char 'a' |
524 | 1 | assert(1 == parser.pattern->group.segments.len); |
525 | 1 | |
526 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
527 | 1 | assert(segment); |
528 | 1 | assert(1 == segment->items.len); |
529 | 1 | |
530 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
531 | 1 | assert(item); |
532 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
533 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
534 | 1 | assert(item->data.literal.count == 1); |
535 | 1 | assert(item->repeat.min == 2); |
536 | 1 | assert(item->repeat.max == 2); |
537 | 1 | |
538 | 1 | passgen_parser_free(&parser); |
539 | 1 | passgen_pattern_free(&parsed_pattern); |
540 | 1 | |
541 | 1 | return test_ok; |
542 | 1 | } |
543 | | |
544 | 1 | test_result test_parser_char_repeat_range(void) { |
545 | 1 | PREAMBLE(); |
546 | 1 | PARSE_CODEPOINT('a'); |
547 | 1 | PARSE_CODEPOINT('{'); |
548 | 1 | PARSE_CODEPOINT('2'); |
549 | 1 | PARSE_CODEPOINT(','); |
550 | 1 | PARSE_CODEPOINT('4'); |
551 | 1 | PARSE_CODEPOINT('}'); |
552 | 1 | |
553 | 1 | // single segment containing char 'a' |
554 | 1 | assert(1 == parser.pattern->group.segments.len); |
555 | 1 | |
556 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
557 | 1 | assert(segment); |
558 | 1 | assert(1 == segment->items.len); |
559 | 1 | |
560 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
561 | 1 | assert(item); |
562 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
563 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
564 | 1 | assert(item->data.literal.count == 1); |
565 | 1 | assert(item->repeat.min == 2); |
566 | 1 | assert(item->repeat.max == 4); |
567 | 1 | |
568 | 1 | POSTAMBLE(); |
569 | 1 | |
570 | 1 | return test_ok; |
571 | 1 | } |
572 | | |
573 | 1 | test_result test_parser_group_ignore_escaped(void) { |
574 | 1 | PREAMBLE(); |
575 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', '('); |
576 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', '{'); |
577 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', '['); |
578 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', '|'); |
579 | 1 | |
580 | 1 | assert(1 == parser.pattern->group.segments.len); |
581 | 1 | |
582 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
583 | 1 | assert(segment); |
584 | 1 | assert(1 == segment->items.len); |
585 | 1 | |
586 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
587 | 1 | assert(item); |
588 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
589 | 1 | assert(item->data.literal.codepoints[0] == '('); |
590 | 1 | assert(item->data.literal.codepoints[1] == '{'); |
591 | 1 | assert(item->data.literal.codepoints[2] == '['); |
592 | 1 | assert(item->data.literal.codepoints[3] == '|'); |
593 | 1 | assert(item->data.literal.count == 4); |
594 | 1 | assert(item->repeat.min == 1); |
595 | 1 | assert(item->repeat.max == 1); |
596 | 1 | |
597 | 1 | POSTAMBLE(); |
598 | 1 | |
599 | 1 | return test_ok; |
600 | 1 | } |
601 | | |
602 | 1 | test_result test_parser_item_maybe(void) { |
603 | 1 | PREAMBLE(); |
604 | 1 | PARSE_CODEPOINT('a'); |
605 | 1 | PARSE_CODEPOINT('a'); |
606 | 1 | PARSE_CODEPOINT('?'); |
607 | 1 | PARSE_CODEPOINT('('); |
608 | 1 | PARSE_CODEPOINT('a'); |
609 | 1 | PARSE_CODEPOINT(')'); |
610 | 1 | PARSE_CODEPOINT('('); |
611 | 1 | PARSE_CODEPOINT('b'); |
612 | 1 | PARSE_CODEPOINT(')'); |
613 | 1 | PARSE_CODEPOINT('?'); |
614 | 1 | |
615 | 1 | // single segment containing char 'a' |
616 | 1 | assert(1 == parser.pattern->group.segments.len); |
617 | 1 | |
618 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
619 | 1 | assert(segment); |
620 | 1 | assert(4 == segment->items.len); |
621 | 1 | |
622 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
623 | 1 | assert(item); |
624 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
625 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
626 | 1 | assert(item->data.literal.count == 1); |
627 | 1 | assert(item->repeat.min == 1); |
628 | 1 | assert(item->repeat.max == 1); |
629 | 1 | assert(item->maybe == false); |
630 | 1 | |
631 | 1 | item = passgen_pattern_segment_get_item(segment, 1); |
632 | 1 | assert(item); |
633 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
634 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
635 | 1 | assert(item->data.literal.count == 1); |
636 | 1 | assert(item->repeat.min == 1); |
637 | 1 | assert(item->repeat.max == 1); |
638 | 1 | assert(item->maybe == true); |
639 | 1 | |
640 | 1 | item = passgen_pattern_segment_get_item(segment, 2); |
641 | 1 | assert(item); |
642 | 1 | assert(item->kind == PASSGEN_PATTERN_GROUP); |
643 | 1 | assert(item->repeat.min == 1); |
644 | 1 | assert(item->repeat.max == 1); |
645 | 1 | assert(item->maybe == false); |
646 | 1 | |
647 | 1 | item = passgen_pattern_segment_get_item(segment, 3); |
648 | 1 | assert(item); |
649 | 1 | assert(item->kind == PASSGEN_PATTERN_GROUP); |
650 | 1 | assert(item->repeat.min == 1); |
651 | 1 | assert(item->repeat.max == 1); |
652 | 1 | assert(item->maybe == true); |
653 | 1 | |
654 | 1 | POSTAMBLE(); |
655 | 1 | |
656 | 1 | return test_ok; |
657 | 1 | } |
658 | | |
659 | 1 | test_result test_parser_special_pronounceable(void) { |
660 | 1 | PREAMBLE(); |
661 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', 'm'); |
662 | 1 | PARSE_CODEPOINT('{'); |
663 | 1 | PARSE_CODEPOINT('e'); |
664 | 1 | PARSE_CODEPOINT('n'); |
665 | 1 | PARSE_CODEPOINT('g'); |
666 | 1 | PARSE_CODEPOINT('l'); |
667 | 1 | PARSE_CODEPOINT('i'); |
668 | 1 | PARSE_CODEPOINT('s'); |
669 | 1 | PARSE_CODEPOINT('h'); |
670 | 1 | PARSE_CODEPOINT('}'); |
671 | 1 | |
672 | 1 | assert(parser.state.len == 1); |
673 | 1 | |
674 | 1 | assert(1 == parser.pattern->group.segments.len); |
675 | 1 | |
676 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
677 | 1 | assert(segment); |
678 | 1 | assert(1 == segment->items.len); |
679 | 1 | |
680 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
681 | 1 | assert(item); |
682 | 1 | assert(item->kind == PASSGEN_PATTERN_SPECIAL); |
683 | 1 | assert(item->repeat.min == 1); |
684 | 1 | assert(item->repeat.max == 1); |
685 | 1 | assert(item->maybe == false); |
686 | 1 | assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV); |
687 | 1 | |
688 | 1 | POSTAMBLE(); |
689 | 1 | |
690 | 1 | return test_ok; |
691 | 1 | } |
692 | | |
693 | 1 | test_result test_parser_mixed_special(void) { |
694 | 1 | PREAMBLE(); |
695 | 1 | PARSE_CODEPOINT('a'); |
696 | 2 | PARSE_CODEPOINT_DOUBLE1 ('\\', 'm'); |
697 | 1 | PARSE_CODEPOINT('{'); |
698 | 1 | PARSE_CODEPOINT('e'); |
699 | 1 | PARSE_CODEPOINT('n'); |
700 | 1 | PARSE_CODEPOINT('g'); |
701 | 1 | PARSE_CODEPOINT('l'); |
702 | 1 | PARSE_CODEPOINT('i'); |
703 | 1 | PARSE_CODEPOINT('s'); |
704 | 1 | PARSE_CODEPOINT('h'); |
705 | 1 | PARSE_CODEPOINT('}'); |
706 | 1 | |
707 | 1 | assert(parser.state.len == 1); |
708 | 1 | |
709 | 1 | assert(1 == parser.pattern->group.segments.len); |
710 | 1 | |
711 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
712 | 1 | assert(segment); |
713 | 1 | assert(2 == segment->items.len); |
714 | 1 | |
715 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
716 | 1 | assert(item); |
717 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
718 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
719 | 1 | assert(item->data.literal.count == 1); |
720 | 1 | assert(item->repeat.min == 1); |
721 | 1 | assert(item->repeat.max == 1); |
722 | 1 | assert(item->maybe == false); |
723 | 1 | |
724 | 1 | item = passgen_pattern_segment_get_item(segment, 1); |
725 | 1 | assert(item); |
726 | 1 | assert(item->kind == PASSGEN_PATTERN_SPECIAL); |
727 | 1 | assert(item->repeat.min == 1); |
728 | 1 | assert(item->repeat.max == 1); |
729 | 1 | assert(item->maybe == false); |
730 | 1 | assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV); |
731 | 1 | |
732 | 1 | POSTAMBLE(); |
733 | 1 | |
734 | 1 | return test_ok; |
735 | 1 | } |
736 | | |
737 | 1 | test_result test_parser_char_maybe_char(void) { |
738 | 1 | PREAMBLE(); |
739 | 1 | PARSE_CODEPOINT('a'); |
740 | 1 | PARSE_CODEPOINT('b'); |
741 | 1 | PARSE_CODEPOINT('c'); |
742 | 1 | PARSE_CODEPOINT('?'); |
743 | 1 | PARSE_CODEPOINT('d'); |
744 | 1 | |
745 | 1 | assert(parser.state.len == 1); |
746 | 1 | |
747 | 1 | assert(1 == parser.pattern->group.segments.len); |
748 | 1 | |
749 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
750 | 1 | assert(segment); |
751 | 1 | assert(3 == segment->items.len); |
752 | 1 | |
753 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
754 | 1 | assert(item); |
755 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
756 | 1 | assert(item->data.literal.count == 2); |
757 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
758 | 1 | assert(item->data.literal.codepoints[1] == 'b'); |
759 | 1 | assert(item->data.literal.tainted == false); |
760 | 1 | assert(item->repeat.min == 1); |
761 | 1 | assert(item->repeat.max == 1); |
762 | 1 | assert(item->maybe == false); |
763 | 1 | |
764 | 1 | item = passgen_pattern_segment_get_item(segment, 1); |
765 | 1 | assert(item); |
766 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
767 | 1 | assert(item->data.literal.count == 1); |
768 | 1 | assert(item->data.literal.codepoints[0] == 'c'); |
769 | 1 | assert(item->data.literal.tainted == true); |
770 | 1 | assert(item->repeat.min == 1); |
771 | 1 | assert(item->repeat.max == 1); |
772 | 1 | assert(item->maybe == true); |
773 | 1 | |
774 | 1 | item = passgen_pattern_segment_get_item(segment, 2); |
775 | 1 | assert(item); |
776 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
777 | 1 | assert(item->data.literal.count == 1); |
778 | 1 | assert(item->data.literal.codepoints[0] == 'd'); |
779 | 1 | assert(item->data.literal.tainted == false); |
780 | 1 | assert(item->repeat.min == 1); |
781 | 1 | assert(item->repeat.max == 1); |
782 | 1 | assert(item->maybe == false); |
783 | 1 | |
784 | 1 | POSTAMBLE(); |
785 | 1 | |
786 | 1 | return test_ok; |
787 | 1 | } |
788 | | |
789 | 1 | test_result test_parser_char_repeat_tainted(void) { |
790 | 1 | PREAMBLE(); |
791 | 1 | PARSE_CODEPOINT('a'); |
792 | 1 | PARSE_CODEPOINT('{'); |
793 | 1 | PARSE_CODEPOINT('3'); |
794 | 1 | PARSE_CODEPOINT('}'); |
795 | 1 | PARSE_CODEPOINT('b'); |
796 | 1 | PARSE_CODEPOINT('c'); |
797 | 1 | PARSE_CODEPOINT('d'); |
798 | 1 | PARSE_CODEPOINT('e'); |
799 | 1 | PARSE_CODEPOINT('f'); |
800 | 1 | PARSE_CODEPOINT('g'); |
801 | 1 | PARSE_CODEPOINT('h'); |
802 | 1 | PARSE_CODEPOINT('i'); |
803 | 1 | PARSE_CODEPOINT('j'); |
804 | 1 | PARSE_CODEPOINT('k'); |
805 | 1 | PARSE_CODEPOINT('{'); |
806 | 1 | PARSE_CODEPOINT('2'); |
807 | 1 | PARSE_CODEPOINT('}'); |
808 | 1 | PARSE_CODEPOINT('?'); |
809 | 1 | PARSE_CODEPOINT('a'); |
810 | 1 | PARSE_CODEPOINT('b'); |
811 | 1 | |
812 | 1 | assert(parser.state.len == 1); |
813 | 1 | |
814 | 1 | assert(1 == parser.pattern->group.segments.len); |
815 | 1 | |
816 | 1 | segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0); |
817 | 1 | assert(segment); |
818 | 1 | assert(5 == segment->items.len); |
819 | 1 | |
820 | 1 | item = passgen_pattern_segment_get_item(segment, 0); |
821 | 1 | assert(item); |
822 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
823 | 1 | assert(item->data.literal.count == 1); |
824 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
825 | 1 | assert(item->data.literal.tainted == true); |
826 | 1 | assert(item->repeat.min == 3); |
827 | 1 | assert(item->repeat.max == 3); |
828 | 1 | assert(item->maybe == false); |
829 | 1 | |
830 | 1 | item = passgen_pattern_segment_get_item(segment, 1); |
831 | 1 | assert(item); |
832 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
833 | 1 | assert(item->data.literal.count == 7); |
834 | 1 | assert(item->data.literal.codepoints[0] == 'b'); |
835 | 1 | assert(item->data.literal.codepoints[1] == 'c'); |
836 | 1 | assert(item->data.literal.codepoints[2] == 'd'); |
837 | 1 | assert(item->data.literal.codepoints[3] == 'e'); |
838 | 1 | assert(item->data.literal.codepoints[4] == 'f'); |
839 | 1 | assert(item->data.literal.codepoints[5] == 'g'); |
840 | 1 | assert(item->data.literal.codepoints[6] == 'h'); |
841 | 1 | assert(item->data.literal.tainted == false); |
842 | 1 | assert(item->repeat.min == 1); |
843 | 1 | assert(item->repeat.max == 1); |
844 | 1 | assert(item->maybe == false); |
845 | 1 | |
846 | 1 | item = passgen_pattern_segment_get_item(segment, 2); |
847 | 1 | assert(item); |
848 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
849 | 1 | assert(item->data.literal.count == 2); |
850 | 1 | assert(item->data.literal.codepoints[0] == 'i'); |
851 | 1 | assert(item->data.literal.codepoints[1] == 'j'); |
852 | 1 | assert(item->data.literal.tainted == false); |
853 | 1 | assert(item->repeat.min == 1); |
854 | 1 | assert(item->repeat.max == 1); |
855 | 1 | assert(item->maybe == false); |
856 | 1 | |
857 | 1 | item = passgen_pattern_segment_get_item(segment, 3); |
858 | 1 | assert(item); |
859 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
860 | 1 | assert(item->data.literal.count == 1); |
861 | 1 | assert(item->data.literal.codepoints[0] == 'k'); |
862 | 1 | assert(item->data.literal.tainted == true); |
863 | 1 | assert(item->repeat.min == 2); |
864 | 1 | assert(item->repeat.max == 2); |
865 | 1 | assert(item->maybe == true); |
866 | 1 | |
867 | 1 | item = passgen_pattern_segment_get_item(segment, 4); |
868 | 1 | assert(item); |
869 | 1 | assert(item->kind == PASSGEN_PATTERN_LITERAL); |
870 | 1 | assert(item->data.literal.count == 2); |
871 | 1 | assert(item->data.literal.codepoints[0] == 'a'); |
872 | 1 | assert(item->data.literal.codepoints[1] == 'b'); |
873 | 1 | assert(item->data.literal.tainted == false); |
874 | 1 | assert(item->repeat.min == 1); |
875 | 1 | assert(item->repeat.max == 1); |
876 | 1 | assert(item->maybe == false); |
877 | 1 | |
878 | 1 | POSTAMBLE(); |
879 | 1 | |
880 | 1 | return test_ok; |
881 | 1 | } |
882 | | |
883 | | #undef PREAMBLE |
884 | | #undef POSTAMBLE |
885 | | #undef PARSE_CODEPOINT |
886 | | #undef PARSE_CODEPOINT_DOUBLE |
887 | | |
888 | | /// Patterns that are known to be broken (result in a parse error). |
889 | | const char *pattern_broken[] = { |
890 | | // closing groups that don't exist |
891 | | ")", |
892 | | ")))", |
893 | | "[a-z]))", |
894 | | // groups with end lower than start |
895 | | "[a-0]", |
896 | | "[z-a]", |
897 | | // groups with missing end |
898 | | "[a-", |
899 | | "[b-", |
900 | | "[b-]", |
901 | | "[za-", |
902 | | // unclosed groups |
903 | | "(", |
904 | | "(()", |
905 | | // unfinished escape sequence |
906 | | "\\", |
907 | | "\\u", |
908 | | "\\u{", |
909 | | "\\u{0a", |
910 | | // unicode literal payload too long |
911 | | "\\u{0000000000000", |
912 | | "\\u{123456789abcdef", |
913 | | // invalid utf8 sequences, taken from: |
914 | | // https://stackoverflow.com/questions/1301402/example-invalid-utf8-string |
915 | | // invalid 2-octet utf8 |
916 | | "\xc3\x28", |
917 | | // invalid 3-octet utf8 (in second octet) |
918 | | "\xe2\x28\xa1", |
919 | | // invalid 3-octet utf8 (in third octet) |
920 | | "\xe2\x82\x28", |
921 | | // Invalid 4 Octet Sequence (in 2nd Octet)' |
922 | | "\xf0\x28\x8c\xbc", |
923 | | // Invalid 4 Octet Sequence (in 3rd Octet) |
924 | | "\xf0\x90\x28\xbc", |
925 | | // Invalid 4 Octet Sequence (in 4th Octet) |
926 | | "\xf0\x28\x8c\x28", |
927 | | // Valid 5 Octet Sequence (but not Unicode!) |
928 | | "\xf8\xa1\xa1\xa1\xa1", |
929 | | // Valid 6 Octet Sequence (but not Unicode!) |
930 | | "\xfc\xa1\xa1\xa1\xa1\xa1", |
931 | | NULL, |
932 | | }; |
933 | | |
934 | | /// Make sure that the parser returns an error when parsing these known broken |
935 | | /// patterns. |
936 | 1 | test_result test_parser_parse_broken(void) { |
937 | 26 | for(int i = 0; pattern_broken[i]; i++25 ) { |
938 | 25 | passgen_pattern pattern; |
939 | 25 | passgen_error error; |
940 | 25 | int ret = passgen_parse(&pattern, &error, pattern_broken[i]); |
941 | 25 | passgen_pattern_free(&pattern); |
942 | 25 | assert(ret != 0); |
943 | 25 | } |
944 | 1 | |
945 | 1 | return test_ok; |
946 | 1 | } |
947 | | |
948 | | /// Patterns that are known to be working. |
949 | | const char *pattern_working[] = { |
950 | | // character literal |
951 | | "", |
952 | | "a", |
953 | | "ab", |
954 | | "abc", |
955 | | "abcd", |
956 | | "abcde", |
957 | | "abcdef", |
958 | | "abcdefg", |
959 | | "abcdefgh", |
960 | | "abcdefghi", |
961 | | "abcdefghij", |
962 | | // character literal repetition |
963 | | "a{1}", |
964 | | "a{9}", |
965 | | "a{12}", |
966 | | "a{12,16}", |
967 | | // group |
968 | | "()", |
969 | | "(a)", |
970 | | "(a|b)", |
971 | | "(a|b|c)", |
972 | | "(a{2}|b|c)", |
973 | | "(a|b|c){2}", |
974 | | // character range |
975 | | "[a]", |
976 | | "[abc]", |
977 | | "[a-z]", |
978 | | "[a-z0-9]", |
979 | | "[a-z0-9!@#$%^&*]", |
980 | | "[a-z]{2}", |
981 | | "[a-z]{2,8}", |
982 | | // unicode literal |
983 | | "\\u{0a}", |
984 | | "\\u{0A}", |
985 | | "\\u{fc}", |
986 | | "\\u{FC}", |
987 | | "\\u{00fc}", |
988 | | "\\u{00FC}", |
989 | | "\\u{10ffff}", |
990 | | // special wordlist |
991 | | "\\w{english}", |
992 | | "\\m{english}", |
993 | | "\\p{pattern}", |
994 | | "\\w{englishenglish}", |
995 | | "\\w{englishenglishenglishenglish}", |
996 | | "\\w{englishenglishenglishenglishenglishenglishenglish}", |
997 | | "\\w{veryveryveryveryveryveryveryveryveryveryveryveryveryveryvery}", |
998 | | // very long pattern |
999 | | "abababababababababababababababababababababababababababababababababababab" |
1000 | | "abababababababababababababababababababababababababababababababababababab" |
1001 | | "abababababababababababababababababababababababababababababababababababab" |
1002 | | "abababababababababababababababababababababababababababababababababababab", |
1003 | | NULL, |
1004 | | }; |
1005 | | |
1006 | | /// Make sure that we can parse patterns that are known to be good. |
1007 | 1 | test_result test_parser_parse_working(void) { |
1008 | 44 | for(int i = 0; pattern_working[i]; i++43 ) { |
1009 | 43 | passgen_pattern pattern; |
1010 | 43 | passgen_error error; |
1011 | 43 | int ret = passgen_parse(&pattern, &error, pattern_working[i]); |
1012 | 43 | assert(ret == 0); |
1013 | 43 | passgen_pattern_free(&pattern); |
1014 | 43 | } |
1015 | 1 | |
1016 | 1 | return test_ok; |
1017 | 1 | } |
1018 | | |
1019 | | /// Make sure that we can parse random patterns. Some of these might be valid, |
1020 | | /// some might not. But none of these should be able to crash the parser in any |
1021 | | /// way. |
1022 | 1 | test_result test_parser_parse_random_selected(void) { |
1023 | 1 | // How many random patterns to generate. |
1024 | 1 | size_t iterations = 10000; |
1025 | 1 | // Characters to choose from. Must be zero-terminated for `strlen` to work |
1026 | 1 | // on it. |
1027 | 1 | const char characters[] = "()[]|{},.abcdefghijklmnopqrstuvw0123456789\\"; |
1028 | 1 | // Find out how many possible characters there are. |
1029 | 1 | size_t characters_len = strlen(characters); |
1030 | 1 | // Maximum length of the string to try parsing. |
1031 | 1 | size_t string_length = 16; |
1032 | 1 | // Storage for the string plus NULL-terminator. |
1033 | 1 | char string[string_length + 1]; |
1034 | 1 | // Source of randomness. |
1035 | 1 | passgen_random *random = passgen_random_new(NULL); |
1036 | 1 | |
1037 | 1 | // Generate random strings and parse them. |
1038 | 10.0k | for(size_t i = 0; i < iterations; i++10.0k ) { |
1039 | 10.0k | // Determine length of random string. |
1040 | 10.0k | size_t length = passgen_random_u8_max(random, string_length); |
1041 | 10.0k | // NUL-terminate the string. |
1042 | 10.0k | string[length] = 0; |
1043 | 10.0k | // Generate random characters. |
1044 | 85.1k | for(size_t c = 0; c < length; c++75.1k ) { |
1045 | 75.1k | string[c] = |
1046 | 75.1k | characters[passgen_random_u8_max(random, characters_len)]; |
1047 | 75.1k | } |
1048 | 10.0k | |
1049 | 10.0k | string[length] = 0; |
1050 | 10.0k | |
1051 | 10.0k | // Parse the string. |
1052 | 10.0k | passgen_pattern pattern; |
1053 | 10.0k | passgen_error error; |
1054 | 10.0k | passgen_parse(&pattern, &error, string); |
1055 | 10.0k | passgen_pattern_free(&pattern); |
1056 | 10.0k | } |
1057 | 1 | |
1058 | 1 | passgen_random_free(random); |
1059 | 1 | |
1060 | 1 | return test_ok; |
1061 | 1 | } |
1062 | | |
1063 | | /// Make sure that we can parse random patterns. Some of these might be valid, |
1064 | | /// some might not. But none of these should be able to crash the parser in any |
1065 | | /// way. |
1066 | 1 | test_result test_parser_parse_random_ascii_printable(void) { |
1067 | 1 | size_t iterations = 10000; |
1068 | 1 | size_t string_length = 16; |
1069 | 1 | char string[string_length + 1]; |
1070 | 1 | passgen_random *random = passgen_random_new(NULL); |
1071 | 1 | |
1072 | 10.0k | for(size_t i = 0; i < iterations; i++10.0k ) { |
1073 | 170k | for(size_t c = 0; c < string_length; c++160k ) { |
1074 | 160k | string[c] = 33 + passgen_random_u8_max(random, 93); |
1075 | 160k | } |
1076 | 10.0k | string[string_length] = 0; |
1077 | 10.0k | |
1078 | 10.0k | // Parse the string. |
1079 | 10.0k | passgen_pattern pattern; |
1080 | 10.0k | passgen_error error; |
1081 | 10.0k | passgen_parse(&pattern, &error, string); |
1082 | 10.0k | passgen_pattern_free(&pattern); |
1083 | 10.0k | } |
1084 | 1 | |
1085 | 1 | passgen_random_free(random); |
1086 | 1 | |
1087 | 1 | return test_ok; |
1088 | 1 | } |
1089 | | |
1090 | 1 | test_result test_parser_parse_random_unicode(void) { |
1091 | 1 | size_t iterations = 10000; |
1092 | 1 | size_t string_length = 16; |
1093 | 1 | uint32_t string[string_length]; |
1094 | 1 | passgen_random *random = passgen_random_new(NULL); |
1095 | 1 | |
1096 | 10.0k | for(size_t i = 0; i < iterations; i++10.0k ) { |
1097 | 170k | for(size_t c = 0; c < string_length; c++160k ) { |
1098 | 160k | string[c] = |
1099 | 160k | 33 + passgen_random_u32_max(random, PASSGEN_UNICODE_MAX); |
1100 | 160k | } |
1101 | 10.0k | |
1102 | 10.0k | // Parse the string. |
1103 | 10.0k | passgen_parser parser; |
1104 | 10.0k | passgen_pattern pattern; |
1105 | 10.0k | passgen_parser_init(&parser, &pattern); |
1106 | 10.0k | passgen_parser_unicode(&parser, string, string_length); |
1107 | 10.0k | passgen_pattern_free(parser.pattern); |
1108 | 10.0k | passgen_parser_free(&parser); |
1109 | 10.0k | } |
1110 | 1 | |
1111 | 1 | passgen_random_free(random); |
1112 | 1 | |
1113 | 1 | return test_ok; |
1114 | 1 | } |