Line data Source code
1 : #include "passgen/parser/parser.h"
2 : #include "passgen/container/stack.h"
3 : #include "passgen/parser/token.h"
4 : #include "passgen/pattern/group.h"
5 : #include "passgen/pattern/literal.h"
6 : #include "passgen/pattern/pattern.h"
7 : #include "passgen/pattern/range.h"
8 : #include "passgen/pattern/repeat.h"
9 : #include "passgen/pattern/segment.h"
10 : #include "passgen/pattern/segment_item.h"
11 : #include "passgen/pattern/set.h"
12 : #include "passgen/random.h"
13 : #include "passgen/util/utf8.h"
14 : #include "tests.h"
15 : #include <passgen/passgen.h>
16 : #include <stdlib.h>
17 :
18 : #define PREAMBLE() \
19 : passgen_parser parser; \
20 : passgen_token_parser token_parser = {0}; \
21 : passgen_token token = {0}; \
22 : passgen_pattern_segment *segment; \
23 : passgen_pattern_item *item; \
24 : passgen_pattern parsed_pattern; \
25 : passgen_parser_init(&parser, &parsed_pattern); \
26 : (void) item; \
27 : (void) segment
28 :
29 : #define POSTAMBLE() \
30 : assert_eq(0, passgen_parse_finish(&parser)); \
31 : passgen_parser_free(&parser); \
32 : passgen_pattern_free(&parsed_pattern);
33 :
34 : #define PARSE_CODEPOINT(codepoint) \
35 : assert( \
36 : passgen_token_parse(&token_parser, &token, 1, codepoint) == \
37 : PASSGEN_TOKEN_INIT); \
38 : assert_eq(0, passgen_parse_token(&parser, &token))
39 :
40 : #define PARSE_CODEPOINT_DOUBLE(a, b) \
41 : assert(passgen_token_parse(&token_parser, &token, 1, a) > 0); \
42 : assert( \
43 : passgen_token_parse(&token_parser, &token, 1, b) == \
44 : PASSGEN_TOKEN_INIT); \
45 : assert_eq(0, passgen_parse_token(&parser, &token))
46 :
47 1 : test_result test_parser_empty(void) {
48 1 : PREAMBLE();
49 : (void) item;
50 : (void) token;
51 : (void) token_parser;
52 :
53 : // single empty segment
54 1 : assert(1 == parser.pattern->group.segments.len);
55 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
56 1 : assert(0 == segment->items.len);
57 :
58 1 : POSTAMBLE();
59 :
60 1 : return test_ok;
61 : }
62 :
63 1 : test_result test_parser_segment_multiplier(void) {
64 1 : PREAMBLE();
65 1 : PARSE_CODEPOINT('(');
66 :
67 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
68 1 : item = passgen_pattern_segment_get_item(segment, 0);
69 :
70 1 : PARSE_CODEPOINT('{');
71 1 : PARSE_CODEPOINT('5');
72 1 : PARSE_CODEPOINT('}');
73 :
74 1 : assert_eq(item->data.group.segments.len, 1);
75 1 : assert_eq(item->data.group.multiplier_sum, 5);
76 1 : segment = passgen_pattern_group_segment_get(&item->data.group, 0);
77 1 : assert_eq(segment->multiplier, 5);
78 :
79 1 : PARSE_CODEPOINT('a');
80 :
81 1 : assert_eq(segment->items.len, 1);
82 :
83 1 : PARSE_CODEPOINT('|');
84 :
85 1 : assert_eq(item->data.group.segments.len, 2);
86 1 : assert_eq(item->data.group.multiplier_sum, 6);
87 1 : segment = passgen_pattern_group_segment_get(&item->data.group, 1);
88 1 : assert_eq(segment->multiplier, 1);
89 :
90 1 : PARSE_CODEPOINT('b');
91 1 : PARSE_CODEPOINT('c');
92 :
93 : // one item (two codepoints in a single literal)
94 1 : assert_eq(segment->items.len, 1);
95 :
96 1 : PARSE_CODEPOINT('|');
97 :
98 1 : assert_eq(item->data.group.segments.len, 3);
99 1 : assert_eq(item->data.group.multiplier_sum, 7);
100 1 : segment = passgen_pattern_group_segment_get(&item->data.group, 2);
101 1 : assert_eq(segment->multiplier, 1);
102 :
103 1 : PARSE_CODEPOINT('{');
104 1 : PARSE_CODEPOINT('3');
105 1 : PARSE_CODEPOINT('}');
106 :
107 1 : assert_eq(item->data.group.multiplier_sum, 9);
108 1 : assert_eq(segment->multiplier, 3);
109 :
110 1 : PARSE_CODEPOINT('c');
111 :
112 1 : assert_eq(segment->items.len, 1);
113 :
114 1 : PARSE_CODEPOINT(')');
115 :
116 1 : assert_eq(item->data.group.multiplier_sum, 9);
117 :
118 1 : POSTAMBLE();
119 :
120 1 : return test_ok;
121 : }
122 :
123 : // when parsing a group, skip over any segments that have a zero multiplier.
124 1 : test_result test_parser_skip_zero_segment(void) {
125 1 : PREAMBLE();
126 1 : PARSE_CODEPOINT('(');
127 :
128 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
129 1 : item = passgen_pattern_segment_get_item(segment, 0);
130 1 : assert_eq(item->data.group.segments.len, 1);
131 :
132 101 : for(size_t i = 0; i < 100; i++) {
133 100 : PARSE_CODEPOINT('{');
134 100 : PARSE_CODEPOINT('0');
135 100 : PARSE_CODEPOINT('}');
136 100 : PARSE_CODEPOINT('|');
137 :
138 : // because the multiplier is zero, this segment is removed
139 100 : assert_eq(item->data.group.segments.len, 1);
140 : }
141 :
142 1 : PARSE_CODEPOINT('{');
143 1 : PARSE_CODEPOINT('0');
144 1 : PARSE_CODEPOINT('}');
145 1 : PARSE_CODEPOINT(')');
146 :
147 : // final segment is also removed, leaving no segments
148 1 : assert_eq(item->data.group.segments.len, 0);
149 :
150 1 : POSTAMBLE();
151 1 : return test_ok;
152 : }
153 :
154 1 : test_result test_parser_empty_group(void) {
155 1 : PREAMBLE();
156 1 : PARSE_CODEPOINT('(');
157 :
158 1 : assert(1 == parser.pattern->group.segments.len);
159 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
160 1 : assert(segment);
161 1 : assert(1 == segment->items.len);
162 :
163 : // group with one empty segment
164 1 : item = passgen_pattern_segment_get_item(segment, 0);
165 1 : assert(item);
166 1 : assert(item->kind == PASSGEN_PATTERN_GROUP);
167 1 : assert_eq(item->data.group.segments.len, 1);
168 :
169 1 : PARSE_CODEPOINT(')');
170 :
171 1 : assert(1 == parser.pattern->group.segments.len);
172 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
173 1 : assert(segment);
174 1 : assert(1 == segment->items.len);
175 :
176 101 : for(size_t i = 0; i < 100; i++) {
177 100 : PARSE_CODEPOINT('(');
178 100 : PARSE_CODEPOINT(')');
179 :
180 100 : assert(1 == parser.pattern->group.segments.len);
181 100 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
182 100 : assert(segment);
183 100 : assert(1 == segment->items.len);
184 : }
185 :
186 1 : POSTAMBLE();
187 1 : return test_ok;
188 : }
189 :
190 1 : test_result test_parser_single_char(void) {
191 1 : PREAMBLE();
192 1 : PARSE_CODEPOINT('a');
193 :
194 : // single segment containing char 'a'
195 1 : assert(1 == parser.pattern->group.segments.len);
196 :
197 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
198 1 : assert(segment);
199 1 : assert(1 == segment->items.len);
200 :
201 1 : item = passgen_pattern_segment_get_item(segment, 0);
202 1 : assert(item);
203 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
204 1 : assert(item->data.literal.codepoints[0] == 'a');
205 1 : assert(item->data.literal.count == 1);
206 :
207 1 : POSTAMBLE();
208 :
209 1 : return test_ok;
210 : }
211 :
212 1 : test_result test_parser_multi_char(void) {
213 1 : PREAMBLE();
214 1 : PARSE_CODEPOINT('a');
215 1 : PARSE_CODEPOINT('b');
216 :
217 : // single segment containing char 'a'
218 1 : assert(1 == parser.pattern->group.segments.len);
219 :
220 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
221 1 : assert(segment);
222 1 : assert(1 == segment->items.len);
223 :
224 1 : item = passgen_pattern_segment_get_item(segment, 0);
225 1 : assert(item);
226 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
227 1 : assert(item->data.literal.codepoints[0] == 'a');
228 1 : assert(item->data.literal.codepoints[1] == 'b');
229 1 : assert(item->data.literal.count == 2);
230 :
231 1 : POSTAMBLE();
232 :
233 1 : return test_ok;
234 : }
235 :
236 1 : test_result test_parser_multi_groups(void) {
237 1 : PREAMBLE();
238 1 : PARSE_CODEPOINT('a');
239 1 : PARSE_CODEPOINT('|');
240 1 : PARSE_CODEPOINT('b');
241 :
242 1 : assert(2 == parser.pattern->group.segments.len);
243 :
244 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
245 1 : assert(1 == segment->items.len);
246 :
247 1 : item = passgen_pattern_segment_get_item(segment, 0);
248 1 : assert(item);
249 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
250 1 : assert(item->data.literal.codepoints[0] == 'a');
251 1 : assert(item->data.literal.count == 1);
252 :
253 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 1);
254 1 : assert(1 == segment->items.len);
255 :
256 1 : item = passgen_pattern_segment_get_item(segment, 0);
257 1 : assert(item);
258 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
259 1 : assert(item->data.literal.codepoints[0] == 'b');
260 1 : assert(item->data.literal.count == 1);
261 :
262 1 : POSTAMBLE();
263 :
264 1 : return test_ok;
265 : }
266 :
267 1 : test_result test_parser_nested_groups(void) {
268 1 : PREAMBLE();
269 1 : PARSE_CODEPOINT('(');
270 1 : PARSE_CODEPOINT('a');
271 1 : PARSE_CODEPOINT(')');
272 :
273 1 : assert(1 == parser.pattern->group.segments.len);
274 :
275 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
276 1 : assert(1 == segment->items.len);
277 :
278 1 : item = passgen_pattern_segment_get_item(segment, 0);
279 1 : assert(item);
280 1 : assert(item->kind == PASSGEN_PATTERN_GROUP);
281 :
282 1 : assert(1 == item->data.group.segments.len);
283 :
284 1 : segment = passgen_pattern_group_segment_get(&item->data.group, 0);
285 1 : assert(1 == segment->items.len);
286 :
287 1 : item = passgen_pattern_segment_get_item(segment, 0);
288 1 : assert(item);
289 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
290 1 : assert(item->data.literal.codepoints[0] == 'a');
291 1 : assert(item->data.literal.count == 1);
292 1 : assert(item->repeat.min == 1);
293 1 : assert(item->repeat.max == 1);
294 :
295 1 : POSTAMBLE();
296 :
297 1 : return test_ok;
298 : }
299 :
300 1 : test_result test_parser_depth_limit(void) {
301 1 : PREAMBLE();
302 1 : parser.limit = 5;
303 1 : PARSE_CODEPOINT('(');
304 1 : PARSE_CODEPOINT('(');
305 1 : PARSE_CODEPOINT('(');
306 1 : PARSE_CODEPOINT('(');
307 1 : assert(0 != passgen_parse_token(&parser, &token));
308 :
309 1 : assert(0 != passgen_parse_finish(&parser));
310 1 : passgen_parser_free(&parser);
311 1 : passgen_pattern_free(&parsed_pattern);
312 :
313 1 : return test_ok;
314 : }
315 :
316 1 : test_result test_parser_multi_nested_groups(void) {
317 1 : PREAMBLE();
318 1 : PARSE_CODEPOINT('(');
319 1 : PARSE_CODEPOINT('a');
320 1 : PARSE_CODEPOINT(')');
321 1 : PARSE_CODEPOINT('(');
322 1 : PARSE_CODEPOINT('b');
323 1 : PARSE_CODEPOINT(')');
324 :
325 1 : assert(1 == parser.pattern->group.segments.len);
326 :
327 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
328 1 : assert(2 == segment->items.len);
329 :
330 1 : item = passgen_pattern_segment_get_item(segment, 0);
331 1 : assert(item);
332 1 : assert(item->kind == PASSGEN_PATTERN_GROUP);
333 :
334 1 : assert(1 == item->data.group.segments.len);
335 :
336 1 : segment = passgen_pattern_group_segment_get(&item->data.group, 0);
337 1 : assert(1 == segment->items.len);
338 :
339 1 : item = passgen_pattern_segment_get_item(segment, 0);
340 1 : assert(item);
341 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
342 1 : assert(item->data.literal.codepoints[0] == 'a');
343 1 : assert(item->data.literal.count == 1);
344 :
345 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
346 1 : item = passgen_pattern_segment_get_item(segment, 1);
347 1 : assert(item);
348 1 : assert(item->kind == PASSGEN_PATTERN_GROUP);
349 :
350 1 : assert(1 == item->data.group.segments.len);
351 :
352 1 : segment = passgen_pattern_group_segment_get(&item->data.group, 0);
353 1 : assert(1 == segment->items.len);
354 :
355 1 : item = passgen_pattern_segment_get_item(segment, 0);
356 1 : assert(item);
357 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
358 1 : assert(item->data.literal.codepoints[0] == 'b');
359 1 : assert(item->data.literal.count == 1);
360 :
361 1 : POSTAMBLE();
362 :
363 1 : return test_ok;
364 : }
365 :
366 1 : test_result test_parser_set_simple(void) {
367 1 : PREAMBLE();
368 1 : PARSE_CODEPOINT('[');
369 1 : PARSE_CODEPOINT('a');
370 1 : PARSE_CODEPOINT('b');
371 1 : PARSE_CODEPOINT(']');
372 : passgen_pattern_range *range;
373 :
374 : // single segment containing char 'a'
375 1 : assert(1 == parser.pattern->group.segments.len);
376 :
377 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
378 1 : assert(segment);
379 1 : assert(1 == segment->items.len);
380 :
381 1 : item = passgen_pattern_segment_get_item(segment, 0);
382 1 : assert(item);
383 1 : assert(item->kind == PASSGEN_PATTERN_SET);
384 :
385 1 : assert(item->data.set.items.len == 2);
386 :
387 1 : range = passgen_pattern_set_range_get(&item->data.set, 0);
388 1 : assert(range);
389 1 : assert(range->start == 'a');
390 1 : assert(range->end == 'a');
391 :
392 1 : range = passgen_pattern_set_range_get(&item->data.set, 1);
393 1 : assert(range);
394 1 : assert(range->start == 'b');
395 1 : assert(range->end == 'b');
396 :
397 1 : POSTAMBLE();
398 :
399 1 : return test_ok;
400 : }
401 :
402 1 : test_result test_parser_set_simple_escaped(void) {
403 1 : PREAMBLE();
404 1 : PARSE_CODEPOINT('[');
405 1 : PARSE_CODEPOINT('a');
406 1 : PARSE_CODEPOINT_DOUBLE('\\', '-');
407 1 : PARSE_CODEPOINT('b');
408 1 : PARSE_CODEPOINT(']');
409 : passgen_pattern_range *range;
410 :
411 : // single segment containing char 'a'
412 1 : assert(1 == parser.pattern->group.segments.len);
413 :
414 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
415 1 : assert(segment);
416 1 : assert(1 == segment->items.len);
417 :
418 1 : item = passgen_pattern_segment_get_item(segment, 0);
419 1 : assert(item);
420 1 : assert(item->kind == PASSGEN_PATTERN_SET);
421 :
422 1 : assert(item->data.set.items.len == 3);
423 :
424 1 : range = passgen_pattern_set_range_get(&item->data.set, 0);
425 1 : assert(range);
426 1 : assert(range->start == 'a');
427 1 : assert(range->end == 'a');
428 :
429 1 : range = passgen_pattern_set_range_get(&item->data.set, 1);
430 1 : assert(range);
431 1 : assert(range->start == '-');
432 1 : assert(range->end == '-');
433 :
434 1 : range = passgen_pattern_set_range_get(&item->data.set, 2);
435 1 : assert(range);
436 1 : assert(range->start == 'b');
437 1 : assert(range->end == 'b');
438 :
439 1 : POSTAMBLE();
440 :
441 1 : return test_ok;
442 : }
443 :
444 1 : test_result test_parser_range_simple(void) {
445 1 : PREAMBLE();
446 1 : PARSE_CODEPOINT('[');
447 1 : PARSE_CODEPOINT('a');
448 1 : PARSE_CODEPOINT('-');
449 1 : PARSE_CODEPOINT('b');
450 1 : PARSE_CODEPOINT(']');
451 : passgen_pattern_range *range;
452 :
453 : // single segment containing char 'a'
454 1 : assert(1 == parser.pattern->group.segments.len);
455 :
456 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
457 1 : assert(segment);
458 1 : assert(1 == segment->items.len);
459 :
460 1 : item = passgen_pattern_segment_get_item(segment, 0);
461 1 : assert(item);
462 1 : assert(item->kind == PASSGEN_PATTERN_SET);
463 :
464 1 : assert(item->data.set.items.len == 1);
465 :
466 1 : range = passgen_pattern_set_range_get(&item->data.set, 0);
467 1 : assert(range);
468 1 : assert(range->start == 'a');
469 1 : assert(range->end == 'b');
470 :
471 1 : POSTAMBLE();
472 :
473 1 : return test_ok;
474 : }
475 :
476 1 : test_result test_parser_range_multiple(void) {
477 1 : PREAMBLE();
478 1 : PARSE_CODEPOINT('[');
479 1 : PARSE_CODEPOINT('a');
480 1 : PARSE_CODEPOINT('-');
481 1 : PARSE_CODEPOINT('b');
482 1 : PARSE_CODEPOINT('c');
483 1 : PARSE_CODEPOINT('-');
484 1 : PARSE_CODEPOINT('d');
485 1 : PARSE_CODEPOINT(']');
486 : passgen_pattern_range *range;
487 :
488 : // single segment containing char 'a'
489 1 : assert(1 == parser.pattern->group.segments.len);
490 :
491 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
492 1 : assert(segment);
493 1 : assert(1 == segment->items.len);
494 :
495 1 : item = passgen_pattern_segment_get_item(segment, 0);
496 1 : assert(item);
497 1 : assert(item->kind == PASSGEN_PATTERN_SET);
498 :
499 1 : assert(item->data.set.items.len == 2);
500 :
501 1 : range = passgen_pattern_set_range_get(&item->data.set, 0);
502 1 : assert(range);
503 1 : assert(range->start == 'a');
504 1 : assert(range->end == 'b');
505 :
506 1 : range = passgen_pattern_set_range_get(&item->data.set, 1);
507 1 : assert(range);
508 1 : assert(range->start == 'c');
509 1 : assert(range->end == 'd');
510 :
511 1 : POSTAMBLE();
512 :
513 1 : return test_ok;
514 : }
515 :
516 1 : test_result test_parser_char_repeat(void) {
517 1 : PREAMBLE();
518 1 : PARSE_CODEPOINT('a');
519 1 : PARSE_CODEPOINT('{');
520 1 : PARSE_CODEPOINT('2');
521 1 : PARSE_CODEPOINT('}');
522 :
523 : // single segment containing char 'a'
524 1 : assert(1 == parser.pattern->group.segments.len);
525 :
526 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
527 1 : assert(segment);
528 1 : assert(1 == segment->items.len);
529 :
530 1 : item = passgen_pattern_segment_get_item(segment, 0);
531 1 : assert(item);
532 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
533 1 : assert(item->data.literal.codepoints[0] == 'a');
534 1 : assert(item->data.literal.count == 1);
535 1 : assert(item->repeat.min == 2);
536 1 : assert(item->repeat.max == 2);
537 :
538 1 : passgen_parser_free(&parser);
539 1 : passgen_pattern_free(&parsed_pattern);
540 :
541 1 : return test_ok;
542 : }
543 :
544 1 : test_result test_parser_char_repeat_range(void) {
545 1 : PREAMBLE();
546 1 : PARSE_CODEPOINT('a');
547 1 : PARSE_CODEPOINT('{');
548 1 : PARSE_CODEPOINT('2');
549 1 : PARSE_CODEPOINT(',');
550 1 : PARSE_CODEPOINT('4');
551 1 : PARSE_CODEPOINT('}');
552 :
553 : // single segment containing char 'a'
554 1 : assert(1 == parser.pattern->group.segments.len);
555 :
556 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
557 1 : assert(segment);
558 1 : assert(1 == segment->items.len);
559 :
560 1 : item = passgen_pattern_segment_get_item(segment, 0);
561 1 : assert(item);
562 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
563 1 : assert(item->data.literal.codepoints[0] == 'a');
564 1 : assert(item->data.literal.count == 1);
565 1 : assert(item->repeat.min == 2);
566 1 : assert(item->repeat.max == 4);
567 :
568 1 : POSTAMBLE();
569 :
570 1 : return test_ok;
571 : }
572 :
573 1 : test_result test_parser_group_ignore_escaped(void) {
574 1 : PREAMBLE();
575 1 : PARSE_CODEPOINT_DOUBLE('\\', '(');
576 1 : PARSE_CODEPOINT_DOUBLE('\\', '{');
577 1 : PARSE_CODEPOINT_DOUBLE('\\', '[');
578 1 : PARSE_CODEPOINT_DOUBLE('\\', '|');
579 :
580 1 : assert(1 == parser.pattern->group.segments.len);
581 :
582 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
583 1 : assert(segment);
584 1 : assert(1 == segment->items.len);
585 :
586 1 : item = passgen_pattern_segment_get_item(segment, 0);
587 1 : assert(item);
588 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
589 1 : assert(item->data.literal.codepoints[0] == '(');
590 1 : assert(item->data.literal.codepoints[1] == '{');
591 1 : assert(item->data.literal.codepoints[2] == '[');
592 1 : assert(item->data.literal.codepoints[3] == '|');
593 1 : assert(item->data.literal.count == 4);
594 1 : assert(item->repeat.min == 1);
595 1 : assert(item->repeat.max == 1);
596 :
597 1 : POSTAMBLE();
598 :
599 1 : return test_ok;
600 : }
601 :
602 1 : test_result test_parser_item_maybe(void) {
603 1 : PREAMBLE();
604 1 : PARSE_CODEPOINT('a');
605 1 : PARSE_CODEPOINT('a');
606 1 : PARSE_CODEPOINT('?');
607 1 : PARSE_CODEPOINT('(');
608 1 : PARSE_CODEPOINT('a');
609 1 : PARSE_CODEPOINT(')');
610 1 : PARSE_CODEPOINT('(');
611 1 : PARSE_CODEPOINT('b');
612 1 : PARSE_CODEPOINT(')');
613 1 : PARSE_CODEPOINT('?');
614 :
615 : // single segment containing char 'a'
616 1 : assert(1 == parser.pattern->group.segments.len);
617 :
618 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
619 1 : assert(segment);
620 1 : assert(4 == segment->items.len);
621 :
622 1 : item = passgen_pattern_segment_get_item(segment, 0);
623 1 : assert(item);
624 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
625 1 : assert(item->data.literal.codepoints[0] == 'a');
626 1 : assert(item->data.literal.count == 1);
627 1 : assert(item->repeat.min == 1);
628 1 : assert(item->repeat.max == 1);
629 1 : assert(item->maybe == false);
630 :
631 1 : item = passgen_pattern_segment_get_item(segment, 1);
632 1 : assert(item);
633 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
634 1 : assert(item->data.literal.codepoints[0] == 'a');
635 1 : assert(item->data.literal.count == 1);
636 1 : assert(item->repeat.min == 1);
637 1 : assert(item->repeat.max == 1);
638 1 : assert(item->maybe == true);
639 :
640 1 : item = passgen_pattern_segment_get_item(segment, 2);
641 1 : assert(item);
642 1 : assert(item->kind == PASSGEN_PATTERN_GROUP);
643 1 : assert(item->repeat.min == 1);
644 1 : assert(item->repeat.max == 1);
645 1 : assert(item->maybe == false);
646 :
647 1 : item = passgen_pattern_segment_get_item(segment, 3);
648 1 : assert(item);
649 1 : assert(item->kind == PASSGEN_PATTERN_GROUP);
650 1 : assert(item->repeat.min == 1);
651 1 : assert(item->repeat.max == 1);
652 1 : assert(item->maybe == true);
653 :
654 1 : POSTAMBLE();
655 :
656 1 : return test_ok;
657 : }
658 :
659 1 : test_result test_parser_special_pronounceable(void) {
660 1 : PREAMBLE();
661 1 : PARSE_CODEPOINT_DOUBLE('\\', 'm');
662 1 : PARSE_CODEPOINT('{');
663 1 : PARSE_CODEPOINT('e');
664 1 : PARSE_CODEPOINT('n');
665 1 : PARSE_CODEPOINT('g');
666 1 : PARSE_CODEPOINT('l');
667 1 : PARSE_CODEPOINT('i');
668 1 : PARSE_CODEPOINT('s');
669 1 : PARSE_CODEPOINT('h');
670 1 : PARSE_CODEPOINT('}');
671 :
672 1 : assert(parser.state.len == 1);
673 :
674 1 : assert(1 == parser.pattern->group.segments.len);
675 :
676 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
677 1 : assert(segment);
678 1 : assert(1 == segment->items.len);
679 :
680 1 : item = passgen_pattern_segment_get_item(segment, 0);
681 1 : assert(item);
682 1 : assert(item->kind == PASSGEN_PATTERN_SPECIAL);
683 1 : assert(item->repeat.min == 1);
684 1 : assert(item->repeat.max == 1);
685 1 : assert(item->maybe == false);
686 1 : assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV);
687 :
688 1 : POSTAMBLE();
689 :
690 1 : return test_ok;
691 : }
692 :
693 1 : test_result test_parser_mixed_special(void) {
694 1 : PREAMBLE();
695 1 : PARSE_CODEPOINT('a');
696 1 : PARSE_CODEPOINT_DOUBLE('\\', 'm');
697 1 : PARSE_CODEPOINT('{');
698 1 : PARSE_CODEPOINT('e');
699 1 : PARSE_CODEPOINT('n');
700 1 : PARSE_CODEPOINT('g');
701 1 : PARSE_CODEPOINT('l');
702 1 : PARSE_CODEPOINT('i');
703 1 : PARSE_CODEPOINT('s');
704 1 : PARSE_CODEPOINT('h');
705 1 : PARSE_CODEPOINT('}');
706 :
707 1 : assert(parser.state.len == 1);
708 :
709 1 : assert(1 == parser.pattern->group.segments.len);
710 :
711 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
712 1 : assert(segment);
713 1 : assert(2 == segment->items.len);
714 :
715 1 : item = passgen_pattern_segment_get_item(segment, 0);
716 1 : assert(item);
717 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
718 1 : assert(item->data.literal.codepoints[0] == 'a');
719 1 : assert(item->data.literal.count == 1);
720 1 : assert(item->repeat.min == 1);
721 1 : assert(item->repeat.max == 1);
722 1 : assert(item->maybe == false);
723 :
724 1 : item = passgen_pattern_segment_get_item(segment, 1);
725 1 : assert(item);
726 1 : assert(item->kind == PASSGEN_PATTERN_SPECIAL);
727 1 : assert(item->repeat.min == 1);
728 1 : assert(item->repeat.max == 1);
729 1 : assert(item->maybe == false);
730 1 : assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV);
731 :
732 1 : POSTAMBLE();
733 :
734 1 : return test_ok;
735 : }
736 :
737 1 : test_result test_parser_char_maybe_char(void) {
738 1 : PREAMBLE();
739 1 : PARSE_CODEPOINT('a');
740 1 : PARSE_CODEPOINT('b');
741 1 : PARSE_CODEPOINT('c');
742 1 : PARSE_CODEPOINT('?');
743 1 : PARSE_CODEPOINT('d');
744 :
745 1 : assert(parser.state.len == 1);
746 :
747 1 : assert(1 == parser.pattern->group.segments.len);
748 :
749 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
750 1 : assert(segment);
751 1 : assert(3 == segment->items.len);
752 :
753 1 : item = passgen_pattern_segment_get_item(segment, 0);
754 1 : assert(item);
755 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
756 1 : assert(item->data.literal.count == 2);
757 1 : assert(item->data.literal.codepoints[0] == 'a');
758 1 : assert(item->data.literal.codepoints[1] == 'b');
759 1 : assert(item->data.literal.tainted == false);
760 1 : assert(item->repeat.min == 1);
761 1 : assert(item->repeat.max == 1);
762 1 : assert(item->maybe == false);
763 :
764 1 : item = passgen_pattern_segment_get_item(segment, 1);
765 1 : assert(item);
766 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
767 1 : assert(item->data.literal.count == 1);
768 1 : assert(item->data.literal.codepoints[0] == 'c');
769 1 : assert(item->data.literal.tainted == true);
770 1 : assert(item->repeat.min == 1);
771 1 : assert(item->repeat.max == 1);
772 1 : assert(item->maybe == true);
773 :
774 1 : item = passgen_pattern_segment_get_item(segment, 2);
775 1 : assert(item);
776 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
777 1 : assert(item->data.literal.count == 1);
778 1 : assert(item->data.literal.codepoints[0] == 'd');
779 1 : assert(item->data.literal.tainted == false);
780 1 : assert(item->repeat.min == 1);
781 1 : assert(item->repeat.max == 1);
782 1 : assert(item->maybe == false);
783 :
784 1 : POSTAMBLE();
785 :
786 1 : return test_ok;
787 : }
788 :
789 1 : test_result test_parser_char_repeat_tainted(void) {
790 1 : PREAMBLE();
791 1 : PARSE_CODEPOINT('a');
792 1 : PARSE_CODEPOINT('{');
793 1 : PARSE_CODEPOINT('3');
794 1 : PARSE_CODEPOINT('}');
795 1 : PARSE_CODEPOINT('b');
796 1 : PARSE_CODEPOINT('c');
797 1 : PARSE_CODEPOINT('d');
798 1 : PARSE_CODEPOINT('e');
799 1 : PARSE_CODEPOINT('f');
800 1 : PARSE_CODEPOINT('g');
801 1 : PARSE_CODEPOINT('h');
802 1 : PARSE_CODEPOINT('i');
803 1 : PARSE_CODEPOINT('j');
804 1 : PARSE_CODEPOINT('k');
805 1 : PARSE_CODEPOINT('{');
806 1 : PARSE_CODEPOINT('2');
807 1 : PARSE_CODEPOINT('}');
808 1 : PARSE_CODEPOINT('?');
809 1 : PARSE_CODEPOINT('a');
810 1 : PARSE_CODEPOINT('b');
811 :
812 1 : assert(parser.state.len == 1);
813 :
814 1 : assert(1 == parser.pattern->group.segments.len);
815 :
816 1 : segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
817 1 : assert(segment);
818 1 : assert(5 == segment->items.len);
819 :
820 1 : item = passgen_pattern_segment_get_item(segment, 0);
821 1 : assert(item);
822 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
823 1 : assert(item->data.literal.count == 1);
824 1 : assert(item->data.literal.codepoints[0] == 'a');
825 1 : assert(item->data.literal.tainted == true);
826 1 : assert(item->repeat.min == 3);
827 1 : assert(item->repeat.max == 3);
828 1 : assert(item->maybe == false);
829 :
830 1 : item = passgen_pattern_segment_get_item(segment, 1);
831 1 : assert(item);
832 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
833 1 : assert(item->data.literal.count == 7);
834 1 : assert(item->data.literal.codepoints[0] == 'b');
835 1 : assert(item->data.literal.codepoints[1] == 'c');
836 1 : assert(item->data.literal.codepoints[2] == 'd');
837 1 : assert(item->data.literal.codepoints[3] == 'e');
838 1 : assert(item->data.literal.codepoints[4] == 'f');
839 1 : assert(item->data.literal.codepoints[5] == 'g');
840 1 : assert(item->data.literal.codepoints[6] == 'h');
841 1 : assert(item->data.literal.tainted == false);
842 1 : assert(item->repeat.min == 1);
843 1 : assert(item->repeat.max == 1);
844 1 : assert(item->maybe == false);
845 :
846 1 : item = passgen_pattern_segment_get_item(segment, 2);
847 1 : assert(item);
848 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
849 1 : assert(item->data.literal.count == 2);
850 1 : assert(item->data.literal.codepoints[0] == 'i');
851 1 : assert(item->data.literal.codepoints[1] == 'j');
852 1 : assert(item->data.literal.tainted == false);
853 1 : assert(item->repeat.min == 1);
854 1 : assert(item->repeat.max == 1);
855 1 : assert(item->maybe == false);
856 :
857 1 : item = passgen_pattern_segment_get_item(segment, 3);
858 1 : assert(item);
859 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
860 1 : assert(item->data.literal.count == 1);
861 1 : assert(item->data.literal.codepoints[0] == 'k');
862 1 : assert(item->data.literal.tainted == true);
863 1 : assert(item->repeat.min == 2);
864 1 : assert(item->repeat.max == 2);
865 1 : assert(item->maybe == true);
866 :
867 1 : item = passgen_pattern_segment_get_item(segment, 4);
868 1 : assert(item);
869 1 : assert(item->kind == PASSGEN_PATTERN_LITERAL);
870 1 : assert(item->data.literal.count == 2);
871 1 : assert(item->data.literal.codepoints[0] == 'a');
872 1 : assert(item->data.literal.codepoints[1] == 'b');
873 1 : assert(item->data.literal.tainted == false);
874 1 : assert(item->repeat.min == 1);
875 1 : assert(item->repeat.max == 1);
876 1 : assert(item->maybe == false);
877 :
878 1 : POSTAMBLE();
879 :
880 1 : return test_ok;
881 : }
882 :
883 : #undef PREAMBLE
884 : #undef POSTAMBLE
885 : #undef PARSE_CODEPOINT
886 : #undef PARSE_CODEPOINT_DOUBLE
887 :
888 : /// Patterns that are known to be broken (result in a parse error).
889 : const char *pattern_broken[] = {
890 : // closing groups that don't exist
891 : ")",
892 : ")))",
893 : "[a-z]))",
894 : // groups with end lower than start
895 : "[a-0]",
896 : "[z-a]",
897 : // groups with missing end
898 : "[a-",
899 : "[b-",
900 : "[b-]",
901 : "[za-",
902 : // unclosed groups
903 : "(",
904 : "(()",
905 : // unfinished escape sequence
906 : "\\",
907 : "\\u",
908 : "\\u{",
909 : "\\u{0a",
910 : // unicode literal payload too long
911 : "\\u{0000000000000",
912 : "\\u{123456789abcdef",
913 : // invalid utf8 sequences, taken from:
914 : // https://stackoverflow.com/questions/1301402/example-invalid-utf8-string
915 : // invalid 2-octet utf8
916 : "\xc3\x28",
917 : // invalid 3-octet utf8 (in second octet)
918 : "\xe2\x28\xa1",
919 : // invalid 3-octet utf8 (in third octet)
920 : "\xe2\x82\x28",
921 : // Invalid 4 Octet Sequence (in 2nd Octet)'
922 : "\xf0\x28\x8c\xbc",
923 : // Invalid 4 Octet Sequence (in 3rd Octet)
924 : "\xf0\x90\x28\xbc",
925 : // Invalid 4 Octet Sequence (in 4th Octet)
926 : "\xf0\x28\x8c\x28",
927 : // Valid 5 Octet Sequence (but not Unicode!)
928 : "\xf8\xa1\xa1\xa1\xa1",
929 : // Valid 6 Octet Sequence (but not Unicode!)
930 : "\xfc\xa1\xa1\xa1\xa1\xa1",
931 : NULL,
932 : };
933 :
934 : /// Make sure that the parser returns an error when parsing these known broken
935 : /// patterns.
936 1 : test_result test_parser_parse_broken(void) {
937 26 : for(int i = 0; pattern_broken[i]; i++) {
938 : passgen_pattern pattern;
939 : passgen_error error;
940 25 : int ret = passgen_parse(&pattern, &error, pattern_broken[i]);
941 25 : passgen_pattern_free(&pattern);
942 25 : assert(ret != 0);
943 : }
944 :
945 1 : return test_ok;
946 : }
947 :
948 : /// Patterns that are known to be working.
949 : const char *pattern_working[] = {
950 : // character literal
951 : "",
952 : "a",
953 : "ab",
954 : "abc",
955 : "abcd",
956 : "abcde",
957 : "abcdef",
958 : "abcdefg",
959 : "abcdefgh",
960 : "abcdefghi",
961 : "abcdefghij",
962 : // character literal repetition
963 : "a{1}",
964 : "a{9}",
965 : "a{12}",
966 : "a{12,16}",
967 : // group
968 : "()",
969 : "(a)",
970 : "(a|b)",
971 : "(a|b|c)",
972 : "(a{2}|b|c)",
973 : "(a|b|c){2}",
974 : // character range
975 : "[a]",
976 : "[abc]",
977 : "[a-z]",
978 : "[a-z0-9]",
979 : "[a-z0-9!@#$%^&*]",
980 : "[a-z]{2}",
981 : "[a-z]{2,8}",
982 : // unicode literal
983 : "\\u{0a}",
984 : "\\u{0A}",
985 : "\\u{fc}",
986 : "\\u{FC}",
987 : "\\u{00fc}",
988 : "\\u{00FC}",
989 : "\\u{10ffff}",
990 : // special wordlist
991 : "\\w{english}",
992 : "\\m{english}",
993 : "\\p{pattern}",
994 : "\\w{englishenglish}",
995 : "\\w{englishenglishenglishenglish}",
996 : "\\w{englishenglishenglishenglishenglishenglishenglish}",
997 : "\\w{veryveryveryveryveryveryveryveryveryveryveryveryveryveryvery}",
998 : // very long pattern
999 : "abababababababababababababababababababababababababababababababababababab"
1000 : "abababababababababababababababababababababababababababababababababababab"
1001 : "abababababababababababababababababababababababababababababababababababab"
1002 : "abababababababababababababababababababababababababababababababababababab",
1003 : NULL,
1004 : };
1005 :
1006 : /// Make sure that we can parse patterns that are known to be good.
1007 1 : test_result test_parser_parse_working(void) {
1008 44 : for(int i = 0; pattern_working[i]; i++) {
1009 : passgen_pattern pattern;
1010 : passgen_error error;
1011 43 : int ret = passgen_parse(&pattern, &error, pattern_working[i]);
1012 43 : assert(ret == 0);
1013 43 : passgen_pattern_free(&pattern);
1014 : }
1015 :
1016 1 : return test_ok;
1017 : }
1018 :
1019 : /// Make sure that we can parse random patterns. Some of these might be valid,
1020 : /// some might not. But none of these should be able to crash the parser in any
1021 : /// way.
1022 1 : test_result test_parser_parse_random_selected(void) {
1023 : // How many random patterns to generate.
1024 1 : size_t iterations = 10000;
1025 : // Characters to choose from. Must be zero-terminated for `strlen` to work
1026 : // on it.
1027 1 : const char characters[] = "()[]|{},.abcdefghijklmnopqrstuvw0123456789\\";
1028 : // Find out how many possible characters there are.
1029 1 : size_t characters_len = strlen(characters);
1030 : // Maximum length of the string to try parsing.
1031 1 : size_t string_length = 16;
1032 : // Storage for the string plus NULL-terminator.
1033 1 : char string[string_length + 1];
1034 : // Source of randomness.
1035 1 : passgen_random *random = passgen_random_open(NULL, NULL);
1036 :
1037 : // Generate random strings and parse them.
1038 10001 : for(size_t i = 0; i < iterations; i++) {
1039 : // Determine length of random string.
1040 10000 : size_t length = passgen_random_u8_max(random, string_length);
1041 : // NUL-terminate the string.
1042 10000 : string[length] = 0;
1043 : // Generate random characters.
1044 85810 : for(size_t c = 0; c < length; c++) {
1045 75810 : string[c] =
1046 75810 : characters[passgen_random_u8_max(random, characters_len)];
1047 : }
1048 :
1049 10000 : string[length] = 0;
1050 :
1051 : // Parse the string.
1052 : passgen_pattern pattern;
1053 : passgen_error error;
1054 10000 : passgen_parse(&pattern, &error, string);
1055 10000 : passgen_pattern_free(&pattern);
1056 : }
1057 :
1058 1 : passgen_random_free(random);
1059 :
1060 1 : return test_ok;
1061 : }
1062 :
1063 : /// Make sure that we can parse random patterns. Some of these might be valid,
1064 : /// some might not. But none of these should be able to crash the parser in any
1065 : /// way.
1066 1 : test_result test_parser_parse_random_ascii_printable(void) {
1067 1 : size_t iterations = 10000;
1068 1 : size_t string_length = 16;
1069 1 : char string[string_length + 1];
1070 1 : passgen_random *random = passgen_random_open(NULL, NULL);
1071 :
1072 10001 : for(size_t i = 0; i < iterations; i++) {
1073 170000 : for(size_t c = 0; c < string_length; c++) {
1074 160000 : string[c] = 33 + passgen_random_u8_max(random, 93);
1075 : }
1076 10000 : string[string_length] = 0;
1077 :
1078 : // Parse the string.
1079 : passgen_pattern pattern;
1080 : passgen_error error;
1081 10000 : passgen_parse(&pattern, &error, string);
1082 10000 : passgen_pattern_free(&pattern);
1083 : }
1084 :
1085 1 : passgen_random_free(random);
1086 :
1087 1 : return test_ok;
1088 : }
1089 :
1090 1 : test_result test_parser_parse_random_unicode(void) {
1091 1 : size_t iterations = 10000;
1092 1 : size_t string_length = 16;
1093 1 : uint32_t string[string_length];
1094 1 : passgen_random *random = passgen_random_open(NULL, NULL);
1095 :
1096 10001 : for(size_t i = 0; i < iterations; i++) {
1097 170000 : for(size_t c = 0; c < string_length; c++) {
1098 160000 : string[c] =
1099 160000 : 33 + passgen_random_u32_max(random, PASSGEN_UNICODE_MAX);
1100 : }
1101 :
1102 : // Parse the string.
1103 : passgen_parser parser;
1104 : passgen_pattern pattern;
1105 10000 : passgen_parser_init(&parser, &pattern);
1106 10000 : passgen_parser_unicode(&parser, string, string_length);
1107 10000 : passgen_pattern_free(parser.pattern);
1108 10000 : passgen_parser_free(&parser);
1109 : }
1110 :
1111 1 : passgen_random_free(random);
1112 :
1113 1 : return test_ok;
1114 : }
|