Coverage Report

Created: 2024-03-22 06:04

/builds/xfbs/passgen/src/tests/parser.c
Line
Count
Source
1
#include "passgen/parser/parser.h"
2
#include "passgen/container/stack.h"
3
#include "passgen/parser/token.h"
4
#include "passgen/pattern/group.h"
5
#include "passgen/pattern/literal.h"
6
#include "passgen/pattern/pattern.h"
7
#include "passgen/pattern/range.h"
8
#include "passgen/pattern/repeat.h"
9
#include "passgen/pattern/segment.h"
10
#include "passgen/pattern/segment_item.h"
11
#include "passgen/pattern/set.h"
12
#include "passgen/util/random.h"
13
#include "passgen/util/utf8.h"
14
#include "tests.h"
15
#include <passgen/passgen.h>
16
#include <stdlib.h>
17
18
#define PREAMBLE()                                 \
19
22
    passgen_parser parser;                         \
20
22
    passgen_token_parser token_parser = {0};       \
21
22
    passgen_token token = {0};                     \
22
22
    passgen_pattern_segment *segment;              \
23
22
    passgen_pattern_item *item;                    \
24
22
    passgen_pattern parsed_pattern;                \
25
22
    passgen_parser_init(&parser, &parsed_pattern); \
26
22
    (void) item;                                   \
27
22
    (void) segment
28
29
#define POSTAMBLE()                              \
30
20
    assert_eq
(0, passgen_parse_finish(&parser))1
; \
31
20
    passgen_parser_free(&parser);                \
32
20
    passgen_pattern_free(&parsed_pattern);
33
34
#define PARSE_CODEPOINT(codepoint)                                  \
35
725
    assert(                                                         \
36
1
        passgen_token_parse(&token_parser, &token, 1, codepoint) == \
37
725
        PASSGEN_TOKEN_INIT);                                        \
38
725
    assert_eq(0, passgen_parse_token(&parser, &token))
39
40
#define PARSE_CODEPOINT_DOUBLE(a, b)                              \
41
7
    assert(passgen_token_parse(&token_parser, &token, 1, a) > 0); \
42
7
    assert(                                                       \
43
7
        passgen_token_parse(&token_parser, &token, 1, b) ==       \
44
7
        PASSGEN_TOKEN_INIT);                                      \
45
7
    assert_eq(0, passgen_parse_token(&parser, &token))
46
47
1
test_result test_parser_empty(void) {
48
1
    PREAMBLE();
49
1
    (void) item;
50
1
    (void) token;
51
1
    (void) token_parser;
52
1
53
1
    // single empty segment
54
1
    assert(1 == parser.pattern->group.segments.len);
55
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
56
1
    assert(0 == segment->items.len);
57
1
58
1
    POSTAMBLE();
59
1
60
1
    return test_ok;
61
1
}
62
63
1
test_result test_parser_segment_multiplier(void) {
64
1
    PREAMBLE();
65
1
    PARSE_CODEPOINT('(');
66
1
67
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
68
1
    item = passgen_pattern_segment_get_item(segment, 0);
69
1
70
1
    PARSE_CODEPOINT('{');
71
1
    PARSE_CODEPOINT('5');
72
1
    PARSE_CODEPOINT('}');
73
1
74
1
    assert_eq(item->data.group.segments.len, 1);
75
1
    assert_eq(item->data.group.multiplier_sum, 5);
76
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
77
1
    assert_eq(segment->multiplier, 5);
78
1
79
1
    PARSE_CODEPOINT('a');
80
1
81
1
    assert_eq(segment->items.len, 1);
82
1
83
1
    PARSE_CODEPOINT('|');
84
1
85
1
    assert_eq(item->data.group.segments.len, 2);
86
1
    assert_eq(item->data.group.multiplier_sum, 6);
87
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 1);
88
1
    assert_eq(segment->multiplier, 1);
89
1
90
1
    PARSE_CODEPOINT('b');
91
1
    PARSE_CODEPOINT('c');
92
1
93
1
    // one item (two codepoints in a single literal)
94
1
    assert_eq(segment->items.len, 1);
95
1
96
1
    PARSE_CODEPOINT('|');
97
1
98
1
    assert_eq(item->data.group.segments.len, 3);
99
1
    assert_eq(item->data.group.multiplier_sum, 7);
100
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 2);
101
1
    assert_eq(segment->multiplier, 1);
102
1
103
1
    PARSE_CODEPOINT('{');
104
1
    PARSE_CODEPOINT('3');
105
1
    PARSE_CODEPOINT('}');
106
1
107
1
    assert_eq(item->data.group.multiplier_sum, 9);
108
1
    assert_eq(segment->multiplier, 3);
109
1
110
1
    PARSE_CODEPOINT('c');
111
1
112
1
    assert_eq(segment->items.len, 1);
113
1
114
1
    PARSE_CODEPOINT(')');
115
1
116
1
    assert_eq(item->data.group.multiplier_sum, 9);
117
1
118
1
    POSTAMBLE();
119
1
120
1
    return test_ok;
121
1
}
122
123
// when parsing a group, skip over any segments that have a zero multiplier.
124
1
test_result test_parser_skip_zero_segment(void) {
125
1
    PREAMBLE();
126
1
    PARSE_CODEPOINT('(');
127
1
128
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
129
1
    item = passgen_pattern_segment_get_item(segment, 0);
130
1
    assert_eq(item->data.group.segments.len, 1);
131
1
132
101
    for(size_t i = 0; i < 100; 
i++100
) {
133
100
        PARSE_CODEPOINT('{');
134
100
        PARSE_CODEPOINT('0');
135
100
        PARSE_CODEPOINT('}');
136
100
        PARSE_CODEPOINT('|');
137
100
138
100
        // because the multiplier is zero, this segment is removed
139
100
        assert_eq(item->data.group.segments.len, 1);
140
100
    }
141
1
142
2
    
PARSE_CODEPOINT1
('{');
143
1
    PARSE_CODEPOINT('0');
144
1
    PARSE_CODEPOINT('}');
145
1
    PARSE_CODEPOINT(')');
146
1
147
1
    // final segment is also removed, leaving no segments
148
1
    assert_eq(item->data.group.segments.len, 0);
149
1
150
1
    POSTAMBLE();
151
1
    return test_ok;
152
1
}
153
154
1
test_result test_parser_empty_group(void) {
155
1
    PREAMBLE();
156
1
    PARSE_CODEPOINT('(');
157
1
158
1
    assert(1 == parser.pattern->group.segments.len);
159
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
160
1
    assert(segment);
161
1
    assert(1 == segment->items.len);
162
1
163
1
    // group with one empty segment
164
1
    item = passgen_pattern_segment_get_item(segment, 0);
165
1
    assert(item);
166
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
167
1
    assert_eq(item->data.group.segments.len, 1);
168
1
169
1
    PARSE_CODEPOINT(')');
170
1
171
1
    assert(1 == parser.pattern->group.segments.len);
172
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
173
1
    assert(segment);
174
1
    assert(1 == segment->items.len);
175
1
176
101
    for(size_t i = 0; i < 100; 
i++100
) {
177
100
        PARSE_CODEPOINT('(');
178
100
        PARSE_CODEPOINT(')');
179
100
180
100
        assert(1 == parser.pattern->group.segments.len);
181
100
        segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
182
100
        assert(segment);
183
100
        assert(1 == segment->items.len);
184
100
    }
185
1
186
1
    POSTAMBLE();
187
1
    return test_ok;
188
1
}
189
190
1
test_result test_parser_single_char(void) {
191
1
    PREAMBLE();
192
1
    PARSE_CODEPOINT('a');
193
1
194
1
    // single segment containing char 'a'
195
1
    assert(1 == parser.pattern->group.segments.len);
196
1
197
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
198
1
    assert(segment);
199
1
    assert(1 == segment->items.len);
200
1
201
1
    item = passgen_pattern_segment_get_item(segment, 0);
202
1
    assert(item);
203
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
204
1
    assert(item->data.literal.codepoints[0] == 'a');
205
1
    assert(item->data.literal.count == 1);
206
1
207
1
    POSTAMBLE();
208
1
209
1
    return test_ok;
210
1
}
211
212
1
test_result test_parser_multi_char(void) {
213
1
    PREAMBLE();
214
1
    PARSE_CODEPOINT('a');
215
1
    PARSE_CODEPOINT('b');
216
1
217
1
    // single segment containing char 'a'
218
1
    assert(1 == parser.pattern->group.segments.len);
219
1
220
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
221
1
    assert(segment);
222
1
    assert(1 == segment->items.len);
223
1
224
1
    item = passgen_pattern_segment_get_item(segment, 0);
225
1
    assert(item);
226
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
227
1
    assert(item->data.literal.codepoints[0] == 'a');
228
1
    assert(item->data.literal.codepoints[1] == 'b');
229
1
    assert(item->data.literal.count == 2);
230
1
231
1
    POSTAMBLE();
232
1
233
1
    return test_ok;
234
1
}
235
236
1
test_result test_parser_multi_groups(void) {
237
1
    PREAMBLE();
238
1
    PARSE_CODEPOINT('a');
239
1
    PARSE_CODEPOINT('|');
240
1
    PARSE_CODEPOINT('b');
241
1
242
1
    assert(2 == parser.pattern->group.segments.len);
243
1
244
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
245
1
    assert(1 == segment->items.len);
246
1
247
1
    item = passgen_pattern_segment_get_item(segment, 0);
248
1
    assert(item);
249
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
250
1
    assert(item->data.literal.codepoints[0] == 'a');
251
1
    assert(item->data.literal.count == 1);
252
1
253
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 1);
254
1
    assert(1 == segment->items.len);
255
1
256
1
    item = passgen_pattern_segment_get_item(segment, 0);
257
1
    assert(item);
258
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
259
1
    assert(item->data.literal.codepoints[0] == 'b');
260
1
    assert(item->data.literal.count == 1);
261
1
262
1
    POSTAMBLE();
263
1
264
1
    return test_ok;
265
1
}
266
267
1
test_result test_parser_nested_groups(void) {
268
1
    PREAMBLE();
269
1
    PARSE_CODEPOINT('(');
270
1
    PARSE_CODEPOINT('a');
271
1
    PARSE_CODEPOINT(')');
272
1
273
1
    assert(1 == parser.pattern->group.segments.len);
274
1
275
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
276
1
    assert(1 == segment->items.len);
277
1
278
1
    item = passgen_pattern_segment_get_item(segment, 0);
279
1
    assert(item);
280
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
281
1
282
1
    assert(1 == item->data.group.segments.len);
283
1
284
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
285
1
    assert(1 == segment->items.len);
286
1
287
1
    item = passgen_pattern_segment_get_item(segment, 0);
288
1
    assert(item);
289
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
290
1
    assert(item->data.literal.codepoints[0] == 'a');
291
1
    assert(item->data.literal.count == 1);
292
1
    assert(item->repeat.min == 1);
293
1
    assert(item->repeat.max == 1);
294
1
295
1
    POSTAMBLE();
296
1
297
1
    return test_ok;
298
1
}
299
300
1
test_result test_parser_depth_limit(void) {
301
1
    PREAMBLE();
302
1
    parser.limit = 5;
303
1
    PARSE_CODEPOINT('(');
304
1
    PARSE_CODEPOINT('(');
305
1
    PARSE_CODEPOINT('(');
306
1
    PARSE_CODEPOINT('(');
307
1
    assert(0 != passgen_parse_token(&parser, &token));
308
1
309
1
    assert(0 != passgen_parse_finish(&parser));
310
1
    passgen_parser_free(&parser);
311
1
    passgen_pattern_free(&parsed_pattern);
312
1
313
1
    return test_ok;
314
1
}
315
316
1
test_result test_parser_multi_nested_groups(void) {
317
1
    PREAMBLE();
318
1
    PARSE_CODEPOINT('(');
319
1
    PARSE_CODEPOINT('a');
320
1
    PARSE_CODEPOINT(')');
321
1
    PARSE_CODEPOINT('(');
322
1
    PARSE_CODEPOINT('b');
323
1
    PARSE_CODEPOINT(')');
324
1
325
1
    assert(1 == parser.pattern->group.segments.len);
326
1
327
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
328
1
    assert(2 == segment->items.len);
329
1
330
1
    item = passgen_pattern_segment_get_item(segment, 0);
331
1
    assert(item);
332
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
333
1
334
1
    assert(1 == item->data.group.segments.len);
335
1
336
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
337
1
    assert(1 == segment->items.len);
338
1
339
1
    item = passgen_pattern_segment_get_item(segment, 0);
340
1
    assert(item);
341
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
342
1
    assert(item->data.literal.codepoints[0] == 'a');
343
1
    assert(item->data.literal.count == 1);
344
1
345
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
346
1
    item = passgen_pattern_segment_get_item(segment, 1);
347
1
    assert(item);
348
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
349
1
350
1
    assert(1 == item->data.group.segments.len);
351
1
352
1
    segment = passgen_pattern_group_segment_get(&item->data.group, 0);
353
1
    assert(1 == segment->items.len);
354
1
355
1
    item = passgen_pattern_segment_get_item(segment, 0);
356
1
    assert(item);
357
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
358
1
    assert(item->data.literal.codepoints[0] == 'b');
359
1
    assert(item->data.literal.count == 1);
360
1
361
1
    POSTAMBLE();
362
1
363
1
    return test_ok;
364
1
}
365
366
1
test_result test_parser_set_simple(void) {
367
1
    PREAMBLE();
368
1
    PARSE_CODEPOINT('[');
369
1
    PARSE_CODEPOINT('a');
370
1
    PARSE_CODEPOINT('b');
371
1
    PARSE_CODEPOINT(']');
372
1
    passgen_pattern_range *range;
373
1
374
1
    // single segment containing char 'a'
375
1
    assert(1 == parser.pattern->group.segments.len);
376
1
377
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
378
1
    assert(segment);
379
1
    assert(1 == segment->items.len);
380
1
381
1
    item = passgen_pattern_segment_get_item(segment, 0);
382
1
    assert(item);
383
1
    assert(item->kind == PASSGEN_PATTERN_SET);
384
1
385
1
    assert(item->data.set.items.len == 2);
386
1
387
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
388
1
    assert(range);
389
1
    assert(range->start == 'a');
390
1
    assert(range->end == 'a');
391
1
392
1
    range = passgen_pattern_set_range_get(&item->data.set, 1);
393
1
    assert(range);
394
1
    assert(range->start == 'b');
395
1
    assert(range->end == 'b');
396
1
397
1
    POSTAMBLE();
398
1
399
1
    return test_ok;
400
1
}
401
402
1
test_result test_parser_set_simple_escaped(void) {
403
1
    PREAMBLE();
404
1
    PARSE_CODEPOINT('[');
405
1
    PARSE_CODEPOINT('a');
406
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '-');
407
1
    PARSE_CODEPOINT('b');
408
1
    PARSE_CODEPOINT(']');
409
1
    passgen_pattern_range *range;
410
1
411
1
    // single segment containing char 'a'
412
1
    assert(1 == parser.pattern->group.segments.len);
413
1
414
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
415
1
    assert(segment);
416
1
    assert(1 == segment->items.len);
417
1
418
1
    item = passgen_pattern_segment_get_item(segment, 0);
419
1
    assert(item);
420
1
    assert(item->kind == PASSGEN_PATTERN_SET);
421
1
422
1
    assert(item->data.set.items.len == 3);
423
1
424
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
425
1
    assert(range);
426
1
    assert(range->start == 'a');
427
1
    assert(range->end == 'a');
428
1
429
1
    range = passgen_pattern_set_range_get(&item->data.set, 1);
430
1
    assert(range);
431
1
    assert(range->start == '-');
432
1
    assert(range->end == '-');
433
1
434
1
    range = passgen_pattern_set_range_get(&item->data.set, 2);
435
1
    assert(range);
436
1
    assert(range->start == 'b');
437
1
    assert(range->end == 'b');
438
1
439
1
    POSTAMBLE();
440
1
441
1
    return test_ok;
442
1
}
443
444
1
test_result test_parser_range_simple(void) {
445
1
    PREAMBLE();
446
1
    PARSE_CODEPOINT('[');
447
1
    PARSE_CODEPOINT('a');
448
1
    PARSE_CODEPOINT('-');
449
1
    PARSE_CODEPOINT('b');
450
1
    PARSE_CODEPOINT(']');
451
1
    passgen_pattern_range *range;
452
1
453
1
    // single segment containing char 'a'
454
1
    assert(1 == parser.pattern->group.segments.len);
455
1
456
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
457
1
    assert(segment);
458
1
    assert(1 == segment->items.len);
459
1
460
1
    item = passgen_pattern_segment_get_item(segment, 0);
461
1
    assert(item);
462
1
    assert(item->kind == PASSGEN_PATTERN_SET);
463
1
464
1
    assert(item->data.set.items.len == 1);
465
1
466
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
467
1
    assert(range);
468
1
    assert(range->start == 'a');
469
1
    assert(range->end == 'b');
470
1
471
1
    POSTAMBLE();
472
1
473
1
    return test_ok;
474
1
}
475
476
1
test_result test_parser_range_multiple(void) {
477
1
    PREAMBLE();
478
1
    PARSE_CODEPOINT('[');
479
1
    PARSE_CODEPOINT('a');
480
1
    PARSE_CODEPOINT('-');
481
1
    PARSE_CODEPOINT('b');
482
1
    PARSE_CODEPOINT('c');
483
1
    PARSE_CODEPOINT('-');
484
1
    PARSE_CODEPOINT('d');
485
1
    PARSE_CODEPOINT(']');
486
1
    passgen_pattern_range *range;
487
1
488
1
    // single segment containing char 'a'
489
1
    assert(1 == parser.pattern->group.segments.len);
490
1
491
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
492
1
    assert(segment);
493
1
    assert(1 == segment->items.len);
494
1
495
1
    item = passgen_pattern_segment_get_item(segment, 0);
496
1
    assert(item);
497
1
    assert(item->kind == PASSGEN_PATTERN_SET);
498
1
499
1
    assert(item->data.set.items.len == 2);
500
1
501
1
    range = passgen_pattern_set_range_get(&item->data.set, 0);
502
1
    assert(range);
503
1
    assert(range->start == 'a');
504
1
    assert(range->end == 'b');
505
1
506
1
    range = passgen_pattern_set_range_get(&item->data.set, 1);
507
1
    assert(range);
508
1
    assert(range->start == 'c');
509
1
    assert(range->end == 'd');
510
1
511
1
    POSTAMBLE();
512
1
513
1
    return test_ok;
514
1
}
515
516
1
test_result test_parser_char_repeat(void) {
517
1
    PREAMBLE();
518
1
    PARSE_CODEPOINT('a');
519
1
    PARSE_CODEPOINT('{');
520
1
    PARSE_CODEPOINT('2');
521
1
    PARSE_CODEPOINT('}');
522
1
523
1
    // single segment containing char 'a'
524
1
    assert(1 == parser.pattern->group.segments.len);
525
1
526
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
527
1
    assert(segment);
528
1
    assert(1 == segment->items.len);
529
1
530
1
    item = passgen_pattern_segment_get_item(segment, 0);
531
1
    assert(item);
532
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
533
1
    assert(item->data.literal.codepoints[0] == 'a');
534
1
    assert(item->data.literal.count == 1);
535
1
    assert(item->repeat.min == 2);
536
1
    assert(item->repeat.max == 2);
537
1
538
1
    passgen_parser_free(&parser);
539
1
    passgen_pattern_free(&parsed_pattern);
540
1
541
1
    return test_ok;
542
1
}
543
544
1
test_result test_parser_char_repeat_range(void) {
545
1
    PREAMBLE();
546
1
    PARSE_CODEPOINT('a');
547
1
    PARSE_CODEPOINT('{');
548
1
    PARSE_CODEPOINT('2');
549
1
    PARSE_CODEPOINT(',');
550
1
    PARSE_CODEPOINT('4');
551
1
    PARSE_CODEPOINT('}');
552
1
553
1
    // single segment containing char 'a'
554
1
    assert(1 == parser.pattern->group.segments.len);
555
1
556
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
557
1
    assert(segment);
558
1
    assert(1 == segment->items.len);
559
1
560
1
    item = passgen_pattern_segment_get_item(segment, 0);
561
1
    assert(item);
562
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
563
1
    assert(item->data.literal.codepoints[0] == 'a');
564
1
    assert(item->data.literal.count == 1);
565
1
    assert(item->repeat.min == 2);
566
1
    assert(item->repeat.max == 4);
567
1
568
1
    POSTAMBLE();
569
1
570
1
    return test_ok;
571
1
}
572
573
1
test_result test_parser_group_ignore_escaped(void) {
574
1
    PREAMBLE();
575
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '(');
576
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '{');
577
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '[');
578
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', '|');
579
1
580
1
    assert(1 == parser.pattern->group.segments.len);
581
1
582
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
583
1
    assert(segment);
584
1
    assert(1 == segment->items.len);
585
1
586
1
    item = passgen_pattern_segment_get_item(segment, 0);
587
1
    assert(item);
588
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
589
1
    assert(item->data.literal.codepoints[0] == '(');
590
1
    assert(item->data.literal.codepoints[1] == '{');
591
1
    assert(item->data.literal.codepoints[2] == '[');
592
1
    assert(item->data.literal.codepoints[3] == '|');
593
1
    assert(item->data.literal.count == 4);
594
1
    assert(item->repeat.min == 1);
595
1
    assert(item->repeat.max == 1);
596
1
597
1
    POSTAMBLE();
598
1
599
1
    return test_ok;
600
1
}
601
602
1
test_result test_parser_item_maybe(void) {
603
1
    PREAMBLE();
604
1
    PARSE_CODEPOINT('a');
605
1
    PARSE_CODEPOINT('a');
606
1
    PARSE_CODEPOINT('?');
607
1
    PARSE_CODEPOINT('(');
608
1
    PARSE_CODEPOINT('a');
609
1
    PARSE_CODEPOINT(')');
610
1
    PARSE_CODEPOINT('(');
611
1
    PARSE_CODEPOINT('b');
612
1
    PARSE_CODEPOINT(')');
613
1
    PARSE_CODEPOINT('?');
614
1
615
1
    // single segment containing char 'a'
616
1
    assert(1 == parser.pattern->group.segments.len);
617
1
618
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
619
1
    assert(segment);
620
1
    assert(4 == segment->items.len);
621
1
622
1
    item = passgen_pattern_segment_get_item(segment, 0);
623
1
    assert(item);
624
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
625
1
    assert(item->data.literal.codepoints[0] == 'a');
626
1
    assert(item->data.literal.count == 1);
627
1
    assert(item->repeat.min == 1);
628
1
    assert(item->repeat.max == 1);
629
1
    assert(item->maybe == false);
630
1
631
1
    item = passgen_pattern_segment_get_item(segment, 1);
632
1
    assert(item);
633
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
634
1
    assert(item->data.literal.codepoints[0] == 'a');
635
1
    assert(item->data.literal.count == 1);
636
1
    assert(item->repeat.min == 1);
637
1
    assert(item->repeat.max == 1);
638
1
    assert(item->maybe == true);
639
1
640
1
    item = passgen_pattern_segment_get_item(segment, 2);
641
1
    assert(item);
642
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
643
1
    assert(item->repeat.min == 1);
644
1
    assert(item->repeat.max == 1);
645
1
    assert(item->maybe == false);
646
1
647
1
    item = passgen_pattern_segment_get_item(segment, 3);
648
1
    assert(item);
649
1
    assert(item->kind == PASSGEN_PATTERN_GROUP);
650
1
    assert(item->repeat.min == 1);
651
1
    assert(item->repeat.max == 1);
652
1
    assert(item->maybe == true);
653
1
654
1
    POSTAMBLE();
655
1
656
1
    return test_ok;
657
1
}
658
659
1
test_result test_parser_special_pronounceable(void) {
660
1
    PREAMBLE();
661
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', 'm');
662
1
    PARSE_CODEPOINT('{');
663
1
    PARSE_CODEPOINT('e');
664
1
    PARSE_CODEPOINT('n');
665
1
    PARSE_CODEPOINT('g');
666
1
    PARSE_CODEPOINT('l');
667
1
    PARSE_CODEPOINT('i');
668
1
    PARSE_CODEPOINT('s');
669
1
    PARSE_CODEPOINT('h');
670
1
    PARSE_CODEPOINT('}');
671
1
672
1
    assert(parser.state.len == 1);
673
1
674
1
    assert(1 == parser.pattern->group.segments.len);
675
1
676
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
677
1
    assert(segment);
678
1
    assert(1 == segment->items.len);
679
1
680
1
    item = passgen_pattern_segment_get_item(segment, 0);
681
1
    assert(item);
682
1
    assert(item->kind == PASSGEN_PATTERN_SPECIAL);
683
1
    assert(item->repeat.min == 1);
684
1
    assert(item->repeat.max == 1);
685
1
    assert(item->maybe == false);
686
1
    assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV);
687
1
688
1
    POSTAMBLE();
689
1
690
1
    return test_ok;
691
1
}
692
693
1
test_result test_parser_mixed_special(void) {
694
1
    PREAMBLE();
695
1
    PARSE_CODEPOINT('a');
696
2
    
PARSE_CODEPOINT_DOUBLE1
('\\', 'm');
697
1
    PARSE_CODEPOINT('{');
698
1
    PARSE_CODEPOINT('e');
699
1
    PARSE_CODEPOINT('n');
700
1
    PARSE_CODEPOINT('g');
701
1
    PARSE_CODEPOINT('l');
702
1
    PARSE_CODEPOINT('i');
703
1
    PARSE_CODEPOINT('s');
704
1
    PARSE_CODEPOINT('h');
705
1
    PARSE_CODEPOINT('}');
706
1
707
1
    assert(parser.state.len == 1);
708
1
709
1
    assert(1 == parser.pattern->group.segments.len);
710
1
711
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
712
1
    assert(segment);
713
1
    assert(2 == segment->items.len);
714
1
715
1
    item = passgen_pattern_segment_get_item(segment, 0);
716
1
    assert(item);
717
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
718
1
    assert(item->data.literal.codepoints[0] == 'a');
719
1
    assert(item->data.literal.count == 1);
720
1
    assert(item->repeat.min == 1);
721
1
    assert(item->repeat.max == 1);
722
1
    assert(item->maybe == false);
723
1
724
1
    item = passgen_pattern_segment_get_item(segment, 1);
725
1
    assert(item);
726
1
    assert(item->kind == PASSGEN_PATTERN_SPECIAL);
727
1
    assert(item->repeat.min == 1);
728
1
    assert(item->repeat.max == 1);
729
1
    assert(item->maybe == false);
730
1
    assert(item->data.special.kind == PASSGEN_PATTERN_SPECIAL_MARKOV);
731
1
732
1
    POSTAMBLE();
733
1
734
1
    return test_ok;
735
1
}
736
737
1
test_result test_parser_char_maybe_char(void) {
738
1
    PREAMBLE();
739
1
    PARSE_CODEPOINT('a');
740
1
    PARSE_CODEPOINT('b');
741
1
    PARSE_CODEPOINT('c');
742
1
    PARSE_CODEPOINT('?');
743
1
    PARSE_CODEPOINT('d');
744
1
745
1
    assert(parser.state.len == 1);
746
1
747
1
    assert(1 == parser.pattern->group.segments.len);
748
1
749
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
750
1
    assert(segment);
751
1
    assert(3 == segment->items.len);
752
1
753
1
    item = passgen_pattern_segment_get_item(segment, 0);
754
1
    assert(item);
755
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
756
1
    assert(item->data.literal.count == 2);
757
1
    assert(item->data.literal.codepoints[0] == 'a');
758
1
    assert(item->data.literal.codepoints[1] == 'b');
759
1
    assert(item->data.literal.tainted == false);
760
1
    assert(item->repeat.min == 1);
761
1
    assert(item->repeat.max == 1);
762
1
    assert(item->maybe == false);
763
1
764
1
    item = passgen_pattern_segment_get_item(segment, 1);
765
1
    assert(item);
766
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
767
1
    assert(item->data.literal.count == 1);
768
1
    assert(item->data.literal.codepoints[0] == 'c');
769
1
    assert(item->data.literal.tainted == true);
770
1
    assert(item->repeat.min == 1);
771
1
    assert(item->repeat.max == 1);
772
1
    assert(item->maybe == true);
773
1
774
1
    item = passgen_pattern_segment_get_item(segment, 2);
775
1
    assert(item);
776
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
777
1
    assert(item->data.literal.count == 1);
778
1
    assert(item->data.literal.codepoints[0] == 'd');
779
1
    assert(item->data.literal.tainted == false);
780
1
    assert(item->repeat.min == 1);
781
1
    assert(item->repeat.max == 1);
782
1
    assert(item->maybe == false);
783
1
784
1
    POSTAMBLE();
785
1
786
1
    return test_ok;
787
1
}
788
789
1
test_result test_parser_char_repeat_tainted(void) {
790
1
    PREAMBLE();
791
1
    PARSE_CODEPOINT('a');
792
1
    PARSE_CODEPOINT('{');
793
1
    PARSE_CODEPOINT('3');
794
1
    PARSE_CODEPOINT('}');
795
1
    PARSE_CODEPOINT('b');
796
1
    PARSE_CODEPOINT('c');
797
1
    PARSE_CODEPOINT('d');
798
1
    PARSE_CODEPOINT('e');
799
1
    PARSE_CODEPOINT('f');
800
1
    PARSE_CODEPOINT('g');
801
1
    PARSE_CODEPOINT('h');
802
1
    PARSE_CODEPOINT('i');
803
1
    PARSE_CODEPOINT('j');
804
1
    PARSE_CODEPOINT('k');
805
1
    PARSE_CODEPOINT('{');
806
1
    PARSE_CODEPOINT('2');
807
1
    PARSE_CODEPOINT('}');
808
1
    PARSE_CODEPOINT('?');
809
1
    PARSE_CODEPOINT('a');
810
1
    PARSE_CODEPOINT('b');
811
1
812
1
    assert(parser.state.len == 1);
813
1
814
1
    assert(1 == parser.pattern->group.segments.len);
815
1
816
1
    segment = passgen_pattern_group_segment_get(&parser.pattern->group, 0);
817
1
    assert(segment);
818
1
    assert(5 == segment->items.len);
819
1
820
1
    item = passgen_pattern_segment_get_item(segment, 0);
821
1
    assert(item);
822
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
823
1
    assert(item->data.literal.count == 1);
824
1
    assert(item->data.literal.codepoints[0] == 'a');
825
1
    assert(item->data.literal.tainted == true);
826
1
    assert(item->repeat.min == 3);
827
1
    assert(item->repeat.max == 3);
828
1
    assert(item->maybe == false);
829
1
830
1
    item = passgen_pattern_segment_get_item(segment, 1);
831
1
    assert(item);
832
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
833
1
    assert(item->data.literal.count == 7);
834
1
    assert(item->data.literal.codepoints[0] == 'b');
835
1
    assert(item->data.literal.codepoints[1] == 'c');
836
1
    assert(item->data.literal.codepoints[2] == 'd');
837
1
    assert(item->data.literal.codepoints[3] == 'e');
838
1
    assert(item->data.literal.codepoints[4] == 'f');
839
1
    assert(item->data.literal.codepoints[5] == 'g');
840
1
    assert(item->data.literal.codepoints[6] == 'h');
841
1
    assert(item->data.literal.tainted == false);
842
1
    assert(item->repeat.min == 1);
843
1
    assert(item->repeat.max == 1);
844
1
    assert(item->maybe == false);
845
1
846
1
    item = passgen_pattern_segment_get_item(segment, 2);
847
1
    assert(item);
848
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
849
1
    assert(item->data.literal.count == 2);
850
1
    assert(item->data.literal.codepoints[0] == 'i');
851
1
    assert(item->data.literal.codepoints[1] == 'j');
852
1
    assert(item->data.literal.tainted == false);
853
1
    assert(item->repeat.min == 1);
854
1
    assert(item->repeat.max == 1);
855
1
    assert(item->maybe == false);
856
1
857
1
    item = passgen_pattern_segment_get_item(segment, 3);
858
1
    assert(item);
859
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
860
1
    assert(item->data.literal.count == 1);
861
1
    assert(item->data.literal.codepoints[0] == 'k');
862
1
    assert(item->data.literal.tainted == true);
863
1
    assert(item->repeat.min == 2);
864
1
    assert(item->repeat.max == 2);
865
1
    assert(item->maybe == true);
866
1
867
1
    item = passgen_pattern_segment_get_item(segment, 4);
868
1
    assert(item);
869
1
    assert(item->kind == PASSGEN_PATTERN_LITERAL);
870
1
    assert(item->data.literal.count == 2);
871
1
    assert(item->data.literal.codepoints[0] == 'a');
872
1
    assert(item->data.literal.codepoints[1] == 'b');
873
1
    assert(item->data.literal.tainted == false);
874
1
    assert(item->repeat.min == 1);
875
1
    assert(item->repeat.max == 1);
876
1
    assert(item->maybe == false);
877
1
878
1
    POSTAMBLE();
879
1
880
1
    return test_ok;
881
1
}
882
883
#undef PREAMBLE
884
#undef POSTAMBLE
885
#undef PARSE_CODEPOINT
886
#undef PARSE_CODEPOINT_DOUBLE
887
888
/// Patterns that are known to be broken (result in a parse error).
889
const char *pattern_broken[] = {
890
    // closing groups that don't exist
891
    ")",
892
    ")))",
893
    "[a-z]))",
894
    // groups with end lower than start
895
    "[a-0]",
896
    "[z-a]",
897
    // groups with missing end
898
    "[a-",
899
    "[b-",
900
    "[b-]",
901
    "[za-",
902
    // unclosed groups
903
    "(",
904
    "(()",
905
    // unfinished escape sequence
906
    "\\",
907
    "\\u",
908
    "\\u{",
909
    "\\u{0a",
910
    // unicode literal payload too long
911
    "\\u{0000000000000",
912
    "\\u{123456789abcdef",
913
    // invalid utf8 sequences, taken from:
914
    // https://stackoverflow.com/questions/1301402/example-invalid-utf8-string
915
    // invalid 2-octet utf8
916
    "\xc3\x28",
917
    // invalid 3-octet utf8 (in second octet)
918
    "\xe2\x28\xa1",
919
    // invalid 3-octet utf8 (in third octet)
920
    "\xe2\x82\x28",
921
    // Invalid 4 Octet Sequence (in 2nd Octet)'
922
    "\xf0\x28\x8c\xbc",
923
    // Invalid 4 Octet Sequence (in 3rd Octet)
924
    "\xf0\x90\x28\xbc",
925
    // Invalid 4 Octet Sequence (in 4th Octet)
926
    "\xf0\x28\x8c\x28",
927
    // Valid 5 Octet Sequence (but not Unicode!)
928
    "\xf8\xa1\xa1\xa1\xa1",
929
    // Valid 6 Octet Sequence (but not Unicode!)
930
    "\xfc\xa1\xa1\xa1\xa1\xa1",
931
    NULL,
932
};
933
934
/// Make sure that the parser returns an error when parsing these known broken
935
/// patterns.
936
1
test_result test_parser_parse_broken(void) {
937
26
    for(int i = 0; pattern_broken[i]; 
i++25
) {
938
25
        passgen_pattern pattern;
939
25
        passgen_error error;
940
25
        int ret = passgen_parse(&pattern, &error, pattern_broken[i]);
941
25
        passgen_pattern_free(&pattern);
942
25
        assert(ret != 0);
943
25
    }
944
1
945
1
    return test_ok;
946
1
}
947
948
/// Patterns that are known to be working.
949
const char *pattern_working[] = {
950
    // character literal
951
    "",
952
    "a",
953
    "ab",
954
    "abc",
955
    "abcd",
956
    "abcde",
957
    "abcdef",
958
    "abcdefg",
959
    "abcdefgh",
960
    "abcdefghi",
961
    "abcdefghij",
962
    // character literal repetition
963
    "a{1}",
964
    "a{9}",
965
    "a{12}",
966
    "a{12,16}",
967
    // group
968
    "()",
969
    "(a)",
970
    "(a|b)",
971
    "(a|b|c)",
972
    "(a{2}|b|c)",
973
    "(a|b|c){2}",
974
    // character range
975
    "[a]",
976
    "[abc]",
977
    "[a-z]",
978
    "[a-z0-9]",
979
    "[a-z0-9!@#$%^&*]",
980
    "[a-z]{2}",
981
    "[a-z]{2,8}",
982
    // unicode literal
983
    "\\u{0a}",
984
    "\\u{0A}",
985
    "\\u{fc}",
986
    "\\u{FC}",
987
    "\\u{00fc}",
988
    "\\u{00FC}",
989
    "\\u{10ffff}",
990
    // special wordlist
991
    "\\w{english}",
992
    "\\m{english}",
993
    "\\p{pattern}",
994
    "\\w{englishenglish}",
995
    "\\w{englishenglishenglishenglish}",
996
    "\\w{englishenglishenglishenglishenglishenglishenglish}",
997
    "\\w{veryveryveryveryveryveryveryveryveryveryveryveryveryveryvery}",
998
    // very long pattern
999
    "abababababababababababababababababababababababababababababababababababab"
1000
    "abababababababababababababababababababababababababababababababababababab"
1001
    "abababababababababababababababababababababababababababababababababababab"
1002
    "abababababababababababababababababababababababababababababababababababab",
1003
    NULL,
1004
};
1005
1006
/// Make sure that we can parse patterns that are known to be good.
1007
1
test_result test_parser_parse_working(void) {
1008
44
    for(int i = 0; pattern_working[i]; 
i++43
) {
1009
43
        passgen_pattern pattern;
1010
43
        passgen_error error;
1011
43
        int ret = passgen_parse(&pattern, &error, pattern_working[i]);
1012
43
        assert(ret == 0);
1013
43
        passgen_pattern_free(&pattern);
1014
43
    }
1015
1
1016
1
    return test_ok;
1017
1
}
1018
1019
/// Make sure that we can parse random patterns. Some of these might be valid,
1020
/// some might not. But none of these should be able to crash the parser in any
1021
/// way.
1022
1
test_result test_parser_parse_random_selected(void) {
1023
1
    // How many random patterns to generate.
1024
1
    size_t iterations = 10000;
1025
1
    // Characters to choose from. Must be zero-terminated for `strlen` to work
1026
1
    // on it.
1027
1
    const char characters[] = "()[]|{},.abcdefghijklmnopqrstuvw0123456789\\";
1028
1
    // Find out how many possible characters there are.
1029
1
    size_t characters_len = strlen(characters);
1030
1
    // Maximum length of the string to try parsing.
1031
1
    size_t string_length = 16;
1032
1
    // Storage for the string plus NULL-terminator.
1033
1
    char string[string_length + 1];
1034
1
    // Source of randomness.
1035
1
    passgen_random *random = passgen_random_new(NULL);
1036
1
1037
1
    // Generate random strings and parse them.
1038
10.0k
    for(size_t i = 0; i < iterations; 
i++10.0k
) {
1039
10.0k
        // Determine length of random string.
1040
10.0k
        size_t length = passgen_random_u8_max(random, string_length);
1041
10.0k
        // NUL-terminate the string.
1042
10.0k
        string[length] = 0;
1043
10.0k
        // Generate random characters.
1044
85.1k
        for(size_t c = 0; c < length; 
c++75.1k
) {
1045
75.1k
            string[c] =
1046
75.1k
                characters[passgen_random_u8_max(random, characters_len)];
1047
75.1k
        }
1048
10.0k
1049
10.0k
        string[length] = 0;
1050
10.0k
1051
10.0k
        // Parse the string.
1052
10.0k
        passgen_pattern pattern;
1053
10.0k
        passgen_error error;
1054
10.0k
        passgen_parse(&pattern, &error, string);
1055
10.0k
        passgen_pattern_free(&pattern);
1056
10.0k
    }
1057
1
1058
1
    passgen_random_free(random);
1059
1
1060
1
    return test_ok;
1061
1
}
1062
1063
/// Make sure that we can parse random patterns. Some of these might be valid,
1064
/// some might not. But none of these should be able to crash the parser in any
1065
/// way.
1066
1
test_result test_parser_parse_random_ascii_printable(void) {
1067
1
    size_t iterations = 10000;
1068
1
    size_t string_length = 16;
1069
1
    char string[string_length + 1];
1070
1
    passgen_random *random = passgen_random_new(NULL);
1071
1
1072
10.0k
    for(size_t i = 0; i < iterations; 
i++10.0k
) {
1073
170k
        for(size_t c = 0; c < string_length; 
c++160k
) {
1074
160k
            string[c] = 33 + passgen_random_u8_max(random, 93);
1075
160k
        }
1076
10.0k
        string[string_length] = 0;
1077
10.0k
1078
10.0k
        // Parse the string.
1079
10.0k
        passgen_pattern pattern;
1080
10.0k
        passgen_error error;
1081
10.0k
        passgen_parse(&pattern, &error, string);
1082
10.0k
        passgen_pattern_free(&pattern);
1083
10.0k
    }
1084
1
1085
1
    passgen_random_free(random);
1086
1
1087
1
    return test_ok;
1088
1
}
1089
1090
1
test_result test_parser_parse_random_unicode(void) {
1091
1
    size_t iterations = 10000;
1092
1
    size_t string_length = 16;
1093
1
    uint32_t string[string_length];
1094
1
    passgen_random *random = passgen_random_new(NULL);
1095
1
1096
10.0k
    for(size_t i = 0; i < iterations; 
i++10.0k
) {
1097
170k
        for(size_t c = 0; c < string_length; 
c++160k
) {
1098
160k
            string[c] =
1099
160k
                33 + passgen_random_u32_max(random, PASSGEN_UNICODE_MAX);
1100
160k
        }
1101
10.0k
1102
10.0k
        // Parse the string.
1103
10.0k
        passgen_parser parser;
1104
10.0k
        passgen_pattern pattern;
1105
10.0k
        passgen_parser_init(&parser, &pattern);
1106
10.0k
        passgen_parser_unicode(&parser, string, string_length);
1107
10.0k
        passgen_pattern_free(parser.pattern);
1108
10.0k
        passgen_parser_free(&parser);
1109
10.0k
    }
1110
1
1111
1
    passgen_random_free(random);
1112
1
1113
1
    return test_ok;
1114
1
}