Coverage Report

Created: 2023-03-17 06:05

/builds/xfbs/passgen/src/tests/parser.c
Line
Count
Source
1
#include "passgen/parser/parser.h"
2
#include "passgen/container/stack.h"
3
#include "passgen/parser/token.h"
4
#include "passgen/pattern/group.h"
5
#include "passgen/pattern/literal.h"
6
#include "passgen/pattern/pattern.h"
7
#include "passgen/pattern/range.h"
8
#include "passgen/pattern/repeat.h"
9
#include "passgen/pattern/segment.h"
10
#include "passgen/pattern/segment_item.h"
11
#include "passgen/pattern/set.h"
12
#include "passgen/util/random.h"
13
#include "tests.h"
14
#include <passgen/passgen.h>
15
16
#define PREAMBLE()                                 \
17
22
    passgen_parser parser;                         \
18
22
    passgen_token_parser token_parser = {0};       \
19
22
    passgen_token token = {0};                     \
20
22
    passgen_pattern_segment *segment;              \
21
22
    passgen_pattern_item *item;                    \
22
22
    passgen_pattern parsed_pattern;                \
23
22
    passgen_parser_init(&parser, &parsed_pattern); \
24
22
    (void) item;                                   \
25
22
    (void) segment
26
27
#define POSTAMBLE()                              \
28
20
    assert_eq
(0, passgen_parse_finish(&parser))1
; \
29
20
    passgen_parser_free(&parser);                \
30
20
    passgen_pattern_free(&parsed_pattern);
31
32
#define PARSE_CODEPOINT(codepoint)                                  \
33
725
    assert(                                                         \
34
1
        passgen_token_parse(&token_parser, &token, 1, codepoint) == \
35
725
        PASSGEN_TOKEN_INIT);                                        \
36
725
    assert_eq(0, passgen_parse_token(&parser, &token))
37
38
#define PARSE_CODEPOINT_DOUBLE(a, b)                              \
39
7
    assert(passgen_token_parse(&token_parser, &token, 1, a) > 0); \
40
7
    assert(                                                       \
41
7
        passgen_token_parse(&token_parser, &token, 1, b) ==       \
42
7
        PASSGEN_TOKEN_INIT);                                      \
43
7
    assert_eq(0, passgen_parse_token(&parser, &token))
44
45
1
test_result test_parser_empty(void) {
46
1
    PREAMBLE();
47
1
    (void) item;
48
1
    (void) token;
49
1
    (void) token_parser;
50
1
51
1
    // single empty segment
52
1
    assert(1 == parser.pattern->group.segments.len);
53
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
54
1
    assert(0 == segment->items.len);
55
1
56
1
    POSTAMBLE();
57
1
58
1
    return test_ok;
59
1
}
60
61
1
test_result test_parser_segment_multiplier(void) {
62
1
    PREAMBLE();
63
1
    PARSE_CODEPOINT('(');
64
1
65
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
66
1
    item = passgen_pattern_segment_get_item(segment, 0);
67
1
68
1
    PARSE_CODEPOINT('{');
69
1
    PARSE_CODEPOINT('5');
70
1
    PARSE_CODEPOINT('}');
71
1
72
1
    assert_eq(item->data.group.segments.len, 1);
73
1
    assert_eq(item->data.group.multiplier_sum, 5);
74
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
75
1
    assert_eq(segment->multiplier, 5);
76
1
77
1
    PARSE_CODEPOINT('a');
78
1
79
1
    assert_eq(segment->items.len, 1);
80
1
81
1
    PARSE_CODEPOINT('|');
82
1
83
1
    assert_eq(item->data.group.segments.len, 2);
84
1
    assert_eq(item->data.group.multiplier_sum, 6);
85
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 1);
86
1
    assert_eq(segment->multiplier, 1);
87
1
88
1
    PARSE_CODEPOINT('b');
89
1
    PARSE_CODEPOINT('c');
90
1
91
1
    // one item (two codepoints in a single literal)
92
1
    assert_eq(segment->items.len, 1);
93
1
94
1
    PARSE_CODEPOINT('|');
95
1
96
1
    assert_eq(item->data.group.segments.len, 3);
97
1
    assert_eq(item->data.group.multiplier_sum, 7);
98
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 2);
99
1
    assert_eq(segment->multiplier, 1);
100
1
101
1
    PARSE_CODEPOINT('{');
102
1
    PARSE_CODEPOINT('3');
103
1
    PARSE_CODEPOINT('}');
104
1
105
1
    assert_eq(item->data.group.multiplier_sum, 9);
106
1
    assert_eq(segment->multiplier, 3);
107
1
108
1
    PARSE_CODEPOINT('c');
109
1
110
1
    assert_eq(segment->items.len, 1);
111
1
112
1
    PARSE_CODEPOINT(')');
113
1
114
1
    assert_eq(item->data.group.multiplier_sum, 9);
115
1
116
1
    POSTAMBLE();
117
1
118
1
    return test_ok;
119
1
}
120
121
// when parsing a group, skip over any segments that have a zero multiplier.
122
1
test_result test_parser_skip_zero_segment(void) {
123
1
    PREAMBLE();
124
1
    PARSE_CODEPOINT('(');
125
1
126
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
127
1
    item = passgen_pattern_segment_get_item(segment, 0);
128
1
    assert_eq(item->data.group.segments.len, 1);
129
1
130
101
    for(size_t i = 0; i < 100; 
i++100
) {
131
100
        PARSE_CODEPOINT('{');
132
100
        PARSE_CODEPOINT('0');
133
100
        PARSE_CODEPOINT('}');
134
100
        PARSE_CODEPOINT('|');
135
100
136
100
        // because the multiplier is zero, this segment is removed
137
100
        assert_eq(item->data.group.segments.len, 1);
138
100
    }
139
1
140
2
    
PARSE_CODEPOINT1
('{');
141
1
    PARSE_CODEPOINT('0');
142
1
    PARSE_CODEPOINT('}');
143
1
    PARSE_CODEPOINT(')');
144
1
145
1
    // final segment is also removed, leaving no segments
146
1
    assert_eq(item->data.group.segments.len, 0);
147
1
148
1
    POSTAMBLE();
149
1
    return test_ok;
150
1
}
151
152
1
test_result test_parser_empty_group(void) {
153
1
    PREAMBLE();
154
1
    PARSE_CODEPOINT('(');
155
1
156
1
    assert(1 == parser.pattern->group.segments.len);
157
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
158
1
    assert(segment);
159
1
    assert(1 == segment->items.len);
160
1
161
1
    // group with one empty segment
162
1
    item = passgen_pattern_segment_get_item(segment, 0);
163
1
    assert(item);
164
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
165
1
    assert_eq(item->data.group.segments.len, 1);
166
1
167
1
    PARSE_CODEPOINT(')');
168
1
169
1
    assert(1 == parser.pattern->group.segments.len);
170
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
171
1
    assert(segment);
172
1
    assert(1 == segment->items.len);
173
1
174
101
    for(size_t i = 0; i < 100; 
i++100
) {
175
100
        PARSE_CODEPOINT('(');
176
100
        PARSE_CODEPOINT(')');
177
100
178
100
        assert(1 == parser.pattern->group.segments.len);
179
100
        segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
180
100
        assert(segment);
181
100
        assert(1 == segment->items.len);
182
100
    }
183
1
184
1
    POSTAMBLE();
185
1
    return test_ok;
186
1
}
187
188
1
test_result test_parser_single_char(void) {
189
1
    PREAMBLE();
190
1
    PARSE_CODEPOINT('a');
191
1
192
1
    // single segment containing char 'a'
193
1
    assert(1 == parser.pattern->group.segments.len);
194
1
195
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
196
1
    assert(segment);
197
1
    assert(1 == segment->items.len);
198
1
199
1
    item = passgen_pattern_segment_get_item(segment, 0);
200
1
    assert(item);
201
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
202
1
    assert(item->data.literal.codepoints[0] == 'a');
203
1
    assert(item->data.literal.count == 1);
204
1
205
1
    POSTAMBLE();
206
1
207
1
    return test_ok;
208
1
}
209
210
1
test_result test_parser_multi_char(void) {
211
1
    PREAMBLE();
212
1
    PARSE_CODEPOINT('a');
213
1
    PARSE_CODEPOINT('b');
214
1
215
1
    // single segment containing char 'a'
216
1
    assert(1 == parser.pattern->group.segments.len);
217
1
218
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
219
1
    assert(segment);
220
1
    assert(1 == segment->items.len);
221
1
222
1
    item = passgen_pattern_segment_get_item(segment, 0);
223
1
    assert(item);
224
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
225
1
    assert(item->data.literal.codepoints[0] == 'a');
226
1
    assert(item->data.literal.codepoints[1] == 'b');
227
1
    assert(item->data.literal.count == 2);
228
1
229
1
    POSTAMBLE();
230
1
231
1
    return test_ok;
232
1
}
233
234
1
test_result test_parser_multi_groups(void) {
235
1
    PREAMBLE();
236
1
    PARSE_CODEPOINT('a');
237
1
    PARSE_CODEPOINT('|');
238
1
    PARSE_CODEPOINT('b');
239
1
240
1
    assert(2 == parser.pattern->group.segments.len);
241
1
242
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
243
1
    assert(1 == segment->items.len);
244
1
245
1
    item = passgen_pattern_segment_get_item(segment, 0);
246
1
    assert(item);
247
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
248
1
    assert(item->data.literal.codepoints[0] == 'a');
249
1
    assert(item->data.literal.count == 1);
250
1
251
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 1);
252
1
    assert(1 == segment->items.len);
253
1
254
1
    item = passgen_pattern_segment_get_item(segment, 0);
255
1
    assert(item);
256
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
257
1
    assert(item->data.literal.codepoints[0] == 'b');
258
1
    assert(item->data.literal.count == 1);
259
1
260
1
    POSTAMBLE();
261
1
262
1
    return test_ok;
263
1
}
264
265
1
test_result test_parser_nested_groups(void) {
266
1
    PREAMBLE();
267
1
    PARSE_CODEPOINT('(');
268
1
    PARSE_CODEPOINT('a');
269
1
    PARSE_CODEPOINT(')');
270
1
271
1
    assert(1 == parser.pattern->group.segments.len);
272
1
273
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
274
1
    assert(1 == segment->items.len);
275
1
276
1
    item = passgen_pattern_segment_get_item(segment, 0);
277
1
    assert(item);
278
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
279
1
280
1
    assert(1 == item->data.group.segments.len);
281
1
282
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
283
1
    assert(1 == segment->items.len);
284
1
285
1
    item = passgen_pattern_segment_get_item(segment, 0);
286
1
    assert(item);
287
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
288
1
    assert(item->data.literal.codepoints[0] == 'a');
289
1
    assert(item->data.literal.count == 1);
290
1
    assert(item->repeat.min == 1);
291
1
    assert(item->repeat.max == 1);
292
1
293
1
    POSTAMBLE();
294
1
295
1
    return test_ok;
296
1
}
297
298
1
test_result test_parser_depth_limit(void) {
299
1
    PREAMBLE();
300
1
    parser.limit = 5;
301
1
    PARSE_CODEPOINT('(');
302
1
    PARSE_CODEPOINT('(');
303
1
    PARSE_CODEPOINT('(');
304
1
    PARSE_CODEPOINT('(');
305
1
    assert(0 != passgen_parse_token(&parser, &token));
306
1
307
1
    assert(0 != passgen_parse_finish(&parser));
308
1
    passgen_parser_free(&parser);
309
1
    passgen_pattern_free(&parsed_pattern);
310
1
311
1
    return test_ok;
312
1
}
313
314
1
test_result test_parser_multi_nested_groups(void) {
315
1
    PREAMBLE();
316
1
    PARSE_CODEPOINT('(');
317
1
    PARSE_CODEPOINT('a');
318
1
    PARSE_CODEPOINT(')');
319
1
    PARSE_CODEPOINT('(');
320
1
    PARSE_CODEPOINT('b');
321
1
    PARSE_CODEPOINT(')');
322
1
323
1
    assert(1 == parser.pattern->group.segments.len);
324
1
325
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
326
1
    assert(2 == segment->items.len);
327
1
328
1
    item = passgen_pattern_segment_get_item(segment, 0);
329
1
    assert(item);
330
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
331
1
332
1
    assert(1 == item->data.group.segments.len);
333
1
334
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
335
1
    assert(1 == segment->items.len);
336
1
337
1
    item = passgen_pattern_segment_get_item(segment, 0);
338
1
    assert(item);
339
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
340
1
    assert(item->data.literal.codepoints[0] == 'a');
341
1
    assert(item->data.literal.count == 1);
342
1
343
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
344
1
    item = passgen_pattern_segment_get_item(segment, 1);
345
1
    assert(item);
346
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
347
1
348
1
    assert(1 == item->data.group.segments.len);
349
1
350
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
351
1
    assert(1 == segment->items.len);
352
1
353
1
    item = passgen_pattern_segment_get_item(segment, 0);
354
1
    assert(item);
355
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
356
1
    assert(item->data.literal.codepoints[0] == 'b');
357
1
    assert(item->data.literal.count == 1);
358
1
359
1
    POSTAMBLE();
360
1
361
1
    return test_ok;
362
1
}
363
364
1
test_result test_parser_set_simple(void) {
365
1
    PREAMBLE();
366
1
    PARSE_CODEPOINT('[');
367
1
    PARSE_CODEPOINT('a');
368
1
    PARSE_CODEPOINT('b');
369
1
    PARSE_CODEPOINT(']');
370
1
    passgen_pattern_range *range;
371
1
372
1
    // single segment containing char 'a'
373
1
    assert(1 == parser.pattern->group.segments.len);
374
1
375
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
376
1
    assert(segment);
377
1
    assert(1 == segment->items.len);
378
1
379
1
    item = passgen_pattern_segment_get_item(segment, 0);
380
1
    assert(item);
381
1
    assert(item->kind == PASSGEN_PATTERN_SET);
382
1
383
1
    assert(item->data.set.items.len == 2);
384
1
385
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
386
1
    assert(range);
387
1
    assert(range->start == 'a');
388
1
    assert(range->end == 'a');
389
1
390
1
    range = passgen_pattern_set_range_get(&item->data.set, 1);
391
1
    assert(range);
392
1
    assert(range->start == 'b');
393
1
    assert(range->end == 'b');
394
1
395
1
    POSTAMBLE();
396
1
397
1
    return test_ok;
398
1
}
399
400
1
test_result test_parser_set_simple_escaped(void) {
401
1
    PREAMBLE();
402
1
    PARSE_CODEPOINT('[');
403
1
    PARSE_CODEPOINT('a');
404
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '-');
405
1
    PARSE_CODEPOINT('b');
406
1
    PARSE_CODEPOINT(']');
407
1
    passgen_pattern_range *range;
408
1
409
1
    // single segment containing char 'a'
410
1
    assert(1 == parser.pattern->group.segments.len);
411
1
412
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
413
1
    assert(segment);
414
1
    assert(1 == segment->items.len);
415
1
416
1
    item = passgen_pattern_segment_get_item(segment, 0);
417
1
    assert(item);
418
1
    assert(item->kind == PASSGEN_PATTERN_SET);
419
1
420
1
    assert(item->data.set.items.len == 3);
421
1
422
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
423
1
    assert(range);
424
1
    assert(range->start == 'a');
425
1
    assert(range->end == 'a');
426
1
427
1
    range = passgen_pattern_set_range_get(&item->data.set, 1);
428
1
    assert(range);
429
1
    assert(range->start == '-');
430
1
    assert(range->end == '-');
431
1
432
1
    range = passgen_pattern_set_range_get(&item->data.set, 2);
433
1
    assert(range);
434
1
    assert(range->start == 'b');
435
1
    assert(range->end == 'b');
436
1
437
1
    POSTAMBLE();
438
1
439
1
    return test_ok;
440
1
}
441
442
1
test_result test_parser_range_simple(void) {
443
1
    PREAMBLE();
444
1
    PARSE_CODEPOINT('[');
445
1
    PARSE_CODEPOINT('a');
446
1
    PARSE_CODEPOINT('-');
447
1
    PARSE_CODEPOINT('b');
448
1
    PARSE_CODEPOINT(']');
449
1
    passgen_pattern_range *range;
450
1
451
1
    // single segment containing char 'a'
452
1
    assert(1 == parser.pattern->group.segments.len);
453
1
454
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
455
1
    assert(segment);
456
1
    assert(1 == segment->items.len);
457
1
458
1
    item = passgen_pattern_segment_get_item(segment, 0);
459
1
    assert(item);
460
1
    assert(item->kind == PASSGEN_PATTERN_SET);
461
1
462
1
    assert(item->data.set.items.len == 1);
463
1
464
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
465
1
    assert(range);
466
1
    assert(range->start == 'a');
467
1
    assert(range->end == 'b');
468
1
469
1
    POSTAMBLE();
470
1
471
1
    return test_ok;
472
1
}
473
474
1
test_result test_parser_range_multiple(void) {
475
1
    PREAMBLE();
476
1
    PARSE_CODEPOINT('[');
477
1
    PARSE_CODEPOINT('a');
478
1
    PARSE_CODEPOINT('-');
479
1
    PARSE_CODEPOINT('b');
480
1
    PARSE_CODEPOINT('c');
481
1
    PARSE_CODEPOINT('-');
482
1
    PARSE_CODEPOINT('d');
483
1
    PARSE_CODEPOINT(']');
484
1
    passgen_pattern_range *range;
485
1
486
1
    // single segment containing char 'a'
487
1
    assert(1 == parser.pattern->group.segments.len);
488
1
489
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
490
1
    assert(segment);
491
1
    assert(1 == segment->items.len);
492
1
493
1
    item = passgen_pattern_segment_get_item(segment, 0);
494
1
    assert(item);
495
1
    assert(item->kind == PASSGEN_PATTERN_SET);
496
1
497
1
    assert(item->data.set.items.len == 2);
498
1
499
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
500
1
    assert(range);
501
1
    assert(range->start == 'a');
502
1
    assert(range->end == 'b');
503
1
504
1
    range = passgen_pattern_set_range_get(&item->data.set, 1);
505
1
    assert(range);
506
1
    assert(range->start == 'c');
507
1
    assert(range->end == 'd');
508
1
509
1
    POSTAMBLE();
510
1
511
1
    return test_ok;
512
1
}
513
514
1
test_result test_parser_char_repeat(void) {
515
1
    PREAMBLE();
516
1
    PARSE_CODEPOINT('a');
517
1
    PARSE_CODEPOINT('{');
518
1
    PARSE_CODEPOINT('2');
519
1
    PARSE_CODEPOINT('}');
520
1
521
1
    // single segment containing char 'a'
522
1
    assert(1 == parser.pattern->group.segments.len);
523
1
524
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
525
1
    assert(segment);
526
1
    assert(1 == segment->items.len);
527
1
528
1
    item = passgen_pattern_segment_get_item(segment, 0);
529
1
    assert(item);
530
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
531
1
    assert(item->data.literal.codepoints[0] == 'a');
532
1
    assert(item->data.literal.count == 1);
533
1
    assert(item->repeat.min == 2);
534
1
    assert(item->repeat.max == 2);
535
1
536
1
    passgen_parser_free(&parser);
537
1
    passgen_pattern_free(&parsed_pattern);
538
1
539
1
    return test_ok;
540
1
}
541
542
1
test_result test_parser_char_repeat_range(void) {
543
1
    PREAMBLE();
544
1
    PARSE_CODEPOINT('a');
545
1
    PARSE_CODEPOINT('{');
546
1
    PARSE_CODEPOINT('2');
547
1
    PARSE_CODEPOINT(',');
548
1
    PARSE_CODEPOINT('4');
549
1
    PARSE_CODEPOINT('}');
550
1
551
1
    // single segment containing char 'a'
552
1
    assert(1 == parser.pattern->group.segments.len);
553
1
554
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
555
1
    assert(segment);
556
1
    assert(1 == segment->items.len);
557
1
558
1
    item = passgen_pattern_segment_get_item(segment, 0);
559
1
    assert(item);
560
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
561
1
    assert(item->data.literal.codepoints[0] == 'a');
562
1
    assert(item->data.literal.count == 1);
563
1
    assert(item->repeat.min == 2);
564
1
    assert(item->repeat.max == 4);
565
1
566
1
    POSTAMBLE();
567
1
568
1
    return test_ok;
569
1
}
570
571
1
test_result test_parser_group_ignore_escaped(void) {
572
1
    PREAMBLE();
573
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '(');
574
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '{');
575
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '[');
576
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '|');
577
1
578
1
    assert(1 == parser.pattern->group.segments.len);
579
1
580
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
581
1
    assert(segment);
582
1
    assert(1 == segment->items.len);
583
1
584
1
    item = passgen_pattern_segment_get_item(segment, 0);
585
1
    assert(item);
586
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
587
1
    assert(item->data.literal.codepoints[0] == '(');
588
1
    assert(item->data.literal.codepoints[1] == '{');
589
1
    assert(item->data.literal.codepoints[2] == '[');
590
1
    assert(item->data.literal.codepoints[3] == '|');
591
1
    assert(item->data.literal.count == 4);
592
1
    assert(item->repeat.min == 1);
593
1
    assert(item->repeat.max == 1);
594
1
595
1
    POSTAMBLE();
596
1
597
1
    return test_ok;
598
1
}
599
600
1
test_result test_parser_item_maybe(void) {
601
1
    PREAMBLE();
602
1
    PARSE_CODEPOINT('a');
603
1
    PARSE_CODEPOINT('a');
604
1
    PARSE_CODEPOINT('?');
605
1
    PARSE_CODEPOINT('(');
606
1
    PARSE_CODEPOINT('a');
607
1
    PARSE_CODEPOINT(')');
608
1
    PARSE_CODEPOINT('(');
609
1
    PARSE_CODEPOINT('b');
610
1
    PARSE_CODEPOINT(')');
611
1
    PARSE_CODEPOINT('?');
612
1
613
1
    // single segment containing char 'a'
614
1
    assert(1 == parser.pattern->group.segments.len);
615
1
616
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
617
1
    assert(segment);
618
1
    assert(4 == segment->items.len);
619
1
620
1
    item = passgen_pattern_segment_get_item(segment, 0);
621
1
    assert(item);
622
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
623
1
    assert(item->data.literal.codepoints[0] == 'a');
624
1
    assert(item->data.literal.count == 1);
625
1
    assert(item->repeat.min == 1);
626
1
    assert(item->repeat.max == 1);
627
1
    assert(item->maybe == false);
628
1
629
1
    item = passgen_pattern_segment_get_item(segment, 1);
630
1
    assert(item);
631
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
632
1
    assert(item->data.literal.codepoints[0] == 'a');
633
1
    assert(item->data.literal.count == 1);
634
1
    assert(item->repeat.min == 1);
635
1
    assert(item->repeat.max == 1);
636
1
    assert(item->maybe == true);
637
1
638
1
    item = passgen_pattern_segment_get_item(segment, 2);
639
1
    assert(item);
640
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
641
1
    assert(item->repeat.min == 1);
642
1
    assert(item->repeat.max == 1);
643
1
    assert(item->maybe == false);
644
1
645
1
    item = passgen_pattern_segment_get_item(segment, 3);
646
1
    assert(item);
647
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
648
1
    assert(item->repeat.min == 1);
649
1
    assert(item->repeat.max == 1);
650
1
    assert(item->maybe == true);
651
1
652
1
    POSTAMBLE();
653
1
654
1
    return test_ok;
655
1
}
656
657
1
test_result test_parser_special_pronounceable(void) {
658
1
    PREAMBLE();
659
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', 'm');
660
1
    PARSE_CODEPOINT('{');
661
1
    PARSE_CODEPOINT('e');
662
1
    PARSE_CODEPOINT('n');
663
1
    PARSE_CODEPOINT('g');
664
1
    PARSE_CODEPOINT('l');
665
1
    PARSE_CODEPOINT('i');
666
1
    PARSE_CODEPOINT('s');
667
1
    PARSE_CODEPOINT('h');
668
1
    PARSE_CODEPOINT('}');
669
1
670
1
    assert(parser.state.len == 1);
671
1
672
1
    assert(1 == parser.pattern->group.segments.len);
673
1
674
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
675
1
    assert(segment);
676
1
    assert(1 == segment->items.len);
677
1
678
1
    item = passgen_pattern_segment_get_item(segment, 0);
679
1
    assert(item);
680
1
    assert(item->kind == PASSGEN_PATTERN_SPECIAL);
681
1
    assert(item->repeat.min == 1);
682
1
    assert(item->repeat.max == 1);
683
1
    assert(item->maybe == false);
684
1
    assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV);
685
1
686
1
    POSTAMBLE();
687
1
688
1
    return test_ok;
689
1
}
690
691
1
test_result test_parser_mixed_special(void) {
692
1
    PREAMBLE();
693
1
    PARSE_CODEPOINT('a');
694
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', 'm');
695
1
    PARSE_CODEPOINT('{');
696
1
    PARSE_CODEPOINT('e');
697
1
    PARSE_CODEPOINT('n');
698
1
    PARSE_CODEPOINT('g');
699
1
    PARSE_CODEPOINT('l');
700
1
    PARSE_CODEPOINT('i');
701
1
    PARSE_CODEPOINT('s');
702
1
    PARSE_CODEPOINT('h');
703
1
    PARSE_CODEPOINT('}');
704
1
705
1
    assert(parser.state.len == 1);
706
1
707
1
    assert(1 == parser.pattern->group.segments.len);
708
1
709
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
710
1
    assert(segment);
711
1
    assert(2 == segment->items.len);
712
1
713
1
    item = passgen_pattern_segment_get_item(segment, 0);
714
1
    assert(item);
715
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
716
1
    assert(item->data.literal.codepoints[0] == 'a');
717
1
    assert(item->data.literal.count == 1);
718
1
    assert(item->repeat.min == 1);
719
1
    assert(item->repeat.max == 1);
720
1
    assert(item->maybe == false);
721
1
722
1
    item = passgen_pattern_segment_get_item(segment, 1);
723
1
    assert(item);
724
1
    assert(item->kind == PASSGEN_PATTERN_SPECIAL);
725
1
    assert(item->repeat.min == 1);
726
1
    assert(item->repeat.max == 1);
727
1
    assert(item->maybe == false);
728
1
    assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV);
729
1
730
1
    POSTAMBLE();
731
1
732
1
    return test_ok;
733
1
}
734
735
1
test_result test_parser_char_maybe_char(void) {
736
1
    PREAMBLE();
737
1
    PARSE_CODEPOINT('a');
738
1
    PARSE_CODEPOINT('b');
739
1
    PARSE_CODEPOINT('c');
740
1
    PARSE_CODEPOINT('?');
741
1
    PARSE_CODEPOINT('d');
742
1
743
1
    assert(parser.state.len == 1);
744
1
745
1
    assert(1 == parser.pattern->group.segments.len);
746
1
747
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
748
1
    assert(segment);
749
1
    assert(3 == segment->items.len);
750
1
751
1
    item = passgen_pattern_segment_get_item(segment, 0);
752
1
    assert(item);
753
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
754
1
    assert(item->data.literal.count == 2);
755
1
    assert(item->data.literal.codepoints[0] == 'a');
756
1
    assert(item->data.literal.codepoints[1] == 'b');
757
1
    assert(item->data.literal.tainted == false);
758
1
    assert(item->repeat.min == 1);
759
1
    assert(item->repeat.max == 1);
760
1
    assert(item->maybe == false);
761
1
762
1
    item = passgen_pattern_segment_get_item(segment, 1);
763
1
    assert(item);
764
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
765
1
    assert(item->data.literal.count == 1);
766
1
    assert(item->data.literal.codepoints[0] == 'c');
767
1
    assert(item->data.literal.tainted == true);
768
1
    assert(item->repeat.min == 1);
769
1
    assert(item->repeat.max == 1);
770
1
    assert(item->maybe == true);
771
1
772
1
    item = passgen_pattern_segment_get_item(segment, 2);
773
1
    assert(item);
774
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
775
1
    assert(item->data.literal.count == 1);
776
1
    assert(item->data.literal.codepoints[0] == 'd');
777
1
    assert(item->data.literal.tainted == false);
778
1
    assert(item->repeat.min == 1);
779
1
    assert(item->repeat.max == 1);
780
1
    assert(item->maybe == false);
781
1
782
1
    POSTAMBLE();
783
1
784
1
    return test_ok;
785
1
}
786
787
1
test_result test_parser_char_repeat_tainted(void) {
788
1
    PREAMBLE();
789
1
    PARSE_CODEPOINT('a');
790
1
    PARSE_CODEPOINT('{');
791
1
    PARSE_CODEPOINT('3');
792
1
    PARSE_CODEPOINT('}');
793
1
    PARSE_CODEPOINT('b');
794
1
    PARSE_CODEPOINT('c');
795
1
    PARSE_CODEPOINT('d');
796
1
    PARSE_CODEPOINT('e');
797
1
    PARSE_CODEPOINT('f');
798
1
    PARSE_CODEPOINT('g');
799
1
    PARSE_CODEPOINT('h');
800
1
    PARSE_CODEPOINT('i');
801
1
    PARSE_CODEPOINT('j');
802
1
    PARSE_CODEPOINT('k');
803
1
    PARSE_CODEPOINT('{');
804
1
    PARSE_CODEPOINT('2');
805
1
    PARSE_CODEPOINT('}');
806
1
    PARSE_CODEPOINT('?');
807
1
    PARSE_CODEPOINT('a');
808
1
    PARSE_CODEPOINT('b');
809
1
810
1
    assert(parser.state.len == 1);
811
1
812
1
    assert(1 == parser.pattern->group.segments.len);
813
1
814
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
815
1
    assert(segment);
816
1
    assert(5 == segment->items.len);
817
1
818
1
    item = passgen_pattern_segment_get_item(segment, 0);
819
1
    assert(item);
820
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
821
1
    assert(item->data.literal.count == 1);
822
1
    assert(item->data.literal.codepoints[0] == 'a');
823
1
    assert(item->data.literal.tainted == true);
824
1
    assert(item->repeat.min == 3);
825
1
    assert(item->repeat.max == 3);
826
1
    assert(item->maybe == false);
827
1
828
1
    item = passgen_pattern_segment_get_item(segment, 1);
829
1
    assert(item);
830
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
831
1
    assert(item->data.literal.count == 7);
832
1
    assert(item->data.literal.codepoints[0] == 'b');
833
1
    assert(item->data.literal.codepoints[1] == 'c');
834
1
    assert(item->data.literal.codepoints[2] == 'd');
835
1
    assert(item->data.literal.codepoints[3] == 'e');
836
1
    assert(item->data.literal.codepoints[4] == 'f');
837
1
    assert(item->data.literal.codepoints[5] == 'g');
838
1
    assert(item->data.literal.codepoints[6] == 'h');
839
1
    assert(item->data.literal.tainted == false);
840
1
    assert(item->repeat.min == 1);
841
1
    assert(item->repeat.max == 1);
842
1
    assert(item->maybe == false);
843
1
844
1
    item = passgen_pattern_segment_get_item(segment, 2);
845
1
    assert(item);
846
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
847
1
    assert(item->data.literal.count == 2);
848
1
    assert(item->data.literal.codepoints[0] == 'i');
849
1
    assert(item->data.literal.codepoints[1] == 'j');
850
1
    assert(item->data.literal.tainted == false);
851
1
    assert(item->repeat.min == 1);
852
1
    assert(item->repeat.max == 1);
853
1
    assert(item->maybe == false);
854
1
855
1
    item = passgen_pattern_segment_get_item(segment, 3);
856
1
    assert(item);
857
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
858
1
    assert(item->data.literal.count == 1);
859
1
    assert(item->data.literal.codepoints[0] == 'k');
860
1
    assert(item->data.literal.tainted == true);
861
1
    assert(item->repeat.min == 2);
862
1
    assert(item->repeat.max == 2);
863
1
    assert(item->maybe == true);
864
1
865
1
    item = passgen_pattern_segment_get_item(segment, 4);
866
1
    assert(item);
867
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
868
1
    assert(item->data.literal.count == 2);
869
1
    assert(item->data.literal.codepoints[0] == 'a');
870
1
    assert(item->data.literal.codepoints[1] == 'b');
871
1
    assert(item->data.literal.tainted == false);
872
1
    assert(item->repeat.min == 1);
873
1
    assert(item->repeat.max == 1);
874
1
    assert(item->maybe == false);
875
1
876
1
    POSTAMBLE();
877
1
878
1
    return test_ok;
879
1
}
880
881
#undef PREAMBLE
882
#undef POSTAMBLE
883
#undef PARSE_CODEPOINT
884
#undef PARSE_CODEPOINT_DOUBLE
885
886
/// Patterns that are known to be broken (result in a parse error).
887
const char *pattern_broken[] = {
888
    // closing groups that don't exist
889
    ")",
890
    ")))",
891
    "[a-z]))",
892
    // groups with end lower than start
893
    "[a-0]",
894
    "[z-a]",
895
    // groups with missing end
896
    "[a-",
897
    "[b-",
898
    "[b-]",
899
    "[za-",
900
    // unclosed groups
901
    "(",
902
    "(()",
903
    // unfinished escape sequence
904
    "\\",
905
    "\\u",
906
    "\\u{",
907
    "\\u{0a",
908
    // unicode literal payload too long
909
    "\\u{0000000000000",
910
    "\\u{123456789abcdef",
911
    // invalid utf8 sequences, taken from:
912
    // https://stackoverflow.com/questions/1301402/example-invalid-utf8-string
913
    // invalid 2-octet utf8
914
    "\xc3\x28",
915
    // invalid 3-octet utf8 (in second octet)
916
    "\xe2\x28\xa1",
917
    // invalid 3-octet utf8 (in third octet)
918
    "\xe2\x82\x28",
919
    // Invalid 4 Octet Sequence (in 2nd Octet)'
920
    "\xf0\x28\x8c\xbc",
921
    // Invalid 4 Octet Sequence (in 3rd Octet)
922
    "\xf0\x90\x28\xbc",
923
    // Invalid 4 Octet Sequence (in 4th Octet)
924
    "\xf0\x28\x8c\x28",
925
    // Valid 5 Octet Sequence (but not Unicode!)
926
    "\xf8\xa1\xa1\xa1\xa1",
927
    // Valid 6 Octet Sequence (but not Unicode!)
928
    "\xfc\xa1\xa1\xa1\xa1\xa1",
929
    NULL,
930
};
931
932
/// Make sure that the parser returns an error when parsing these known broken
933
/// patterns.
934
1
test_result test_parser_can_parse_broken(void) {
935
26
    for(int i = 0; pattern_broken[i]; 
i++25
) {
936
25
        passgen_pattern pattern;
937
25
        passgen_error error;
938
25
        int ret = passgen_parse(&pattern, &error, pattern_broken[i]);
939
25
        passgen_pattern_free(&pattern);
940
25
        assert(ret != 0);
941
25
    }
942
1
943
1
    return test_ok;
944
1
}
945
946
/// Patterns that are known to be working.
947
const char *pattern_working[] = {
948
    // character literal
949
    "",
950
    "a",
951
    "ab",
952
    "abc",
953
    "abcd",
954
    "abcde",
955
    "abcdef",
956
    "abcdefg",
957
    "abcdefgh",
958
    "abcdefghi",
959
    "abcdefghij",
960
    // character literal repetition
961
    "a{1}",
962
    "a{9}",
963
    "a{12}",
964
    "a{12,16}",
965
    // group
966
    "()",
967
    "(a)",
968
    "(a|b)",
969
    "(a|b|c)",
970
    "(a{2}|b|c)",
971
    "(a|b|c){2}",
972
    // character range
973
    "[a]",
974
    "[abc]",
975
    "[a-z]",
976
    "[a-z0-9]",
977
    "[a-z0-9!@#$%^&*]",
978
    "[a-z]{2}",
979
    "[a-z]{2,8}",
980
    // unicode literal
981
    "\\u{0a}",
982
    "\\u{0A}",
983
    "\\u{fc}",
984
    "\\u{FC}",
985
    "\\u{00fc}",
986
    "\\u{00FC}",
987
    "\\u{10ffff}",
988
    // special wordlist
989
    "\\w{english}",
990
    "\\m{english}",
991
    "\\p{pattern}",
992
    "\\w{englishenglish}",
993
    "\\w{englishenglishenglishenglish}",
994
    "\\w{englishenglishenglishenglishenglishenglishenglish}",
995
    "\\w{veryveryveryveryveryveryveryveryveryveryveryveryveryveryvery}",
996
    // very long pattern
997
    "abababababababababababababababababababababababababababababababababababab"
998
    "abababababababababababababababababababababababababababababababababababab"
999
    "abababababababababababababababababababababababababababababababababababab"
1000
    "abababababababababababababababababababababababababababababababababababab",
1001
    NULL,
1002
};
1003
1004
/// Make sure that we can parse patterns that are known to be good.
1005
1
test_result test_parser_can_parse_working(void) {
1006
44
    for(int i = 0; pattern_working[i]; 
i++43
) {
1007
43
        passgen_pattern pattern;
1008
43
        passgen_error error;
1009
43
        int ret = passgen_parse(&pattern, &error, pattern_working[i]);
1010
43
        assert(ret == 0);
1011
43
        passgen_pattern_free(&pattern);
1012
43
    }
1013
1
1014
1
    return test_ok;
1015
1
}
1016
1017
/// Make sure that we can parse random patterns. Some of these might be valid,
1018
/// some might not. But none of these should be able to crash the parser in any
1019
/// way.
1020
1
test_result test_parser_can_parse_random(void) {
1021
1
    // How many random patterns to generate.
1022
1
    size_t iterations = 10000;
1023
1
    // Characters to choose from. Must be zero-terminated for `strlen` to work
1024
1
    // on it.
1025
1
    const char characters[] = {
1026
1
        '(',
1027
1
        ')',
1028
1
        '[',
1029
1
        ']',
1030
1
        '|',
1031
1
        '{',
1032
1
        '}',
1033
1
        ',',
1034
1
        'a',
1035
1
        'w',
1036
1
        'm',
1037
1
        'p',
1038
1
        'z',
1039
1
        '0',
1040
1
        '9',
1041
1
        '\\',
1042
1
        0};
1043
1
    // Find out how many possible characters there are.
1044
1
    size_t characters_len = strlen(characters);
1045
1
    // Maximum length of the string to try parsing.
1046
1
    size_t string_length = 16;
1047
1
    // Storage for the string plus NULL-terminator.
1048
1
    char string[string_length + 1];
1049
1
    // Source of randomness.
1050
1
    passgen_random *random = passgen_random_new(NULL);
1051
1
1052
1
    // Generate random strings and parse them.
1053
10.0k
    for(size_t i = 0; i < iterations; 
i++10.0k
) {
1054
10.0k
        // Determine length of random string.
1055
10.0k
        size_t length = passgen_random_u8_max(random, string_length);
1056
10.0k
        // NUL-terminate the string.
1057
10.0k
        string[length] = 0;
1058
10.0k
        // Generate random characters.
1059
84.6k
        for(size_t c = 0; c < length; 
c++74.6k
) {
1060
74.6k
            string[c] =
1061
74.6k
                characters[passgen_random_u8_max(random, characters_len)];
1062
74.6k
        }
1063
10.0k
1064
10.0k
        // Parse the string.
1065
10.0k
        passgen_pattern pattern;
1066
10.0k
        passgen_error error;
1067
10.0k
        passgen_parse(&pattern, &error, string);
1068
10.0k
        passgen_pattern_free(&pattern);
1069
10.0k
    }
1070
1
1071
1
    passgen_random_free(random);
1072
1
1073
1
    return test_ok;
1074
1
}