Coverage Report

Created: 2024-05-03 06:05

/builds/xfbs/passgen/src/parser/parser.c
Line
Count
Source (jump to first uncovered line)
1
#include "passgen/parser/parser.h"
2
3
#include <stdbool.h>
4
#include <stddef.h>
5
#include <stdint.h>
6
#include <stdlib.h>
7
8
#include "passgen/container/stack.h"
9
#include "passgen/pattern/group.h"
10
#include "passgen/pattern/literal.h"
11
#include "passgen/pattern/pattern.h"
12
#include "passgen/pattern/range.h"
13
#include "passgen/pattern/repeat.h"
14
#include "passgen/pattern/segment.h"
15
#include "passgen/pattern/segment_item.h"
16
#include "passgen/pattern/set.h"
17
18
int passgen_parse_group(
19
    struct passgen_parser *parser,
20
    struct passgen_token *token,
21
    passgen_parser_state *state);
22
23
int passgen_parse_multiplier(
24
    struct passgen_parser *parser,
25
    struct passgen_token *token,
26
    passgen_parser_state *state);
27
28
int passgen_parse_set(
29
    struct passgen_parser *parser,
30
    struct passgen_token *token,
31
    passgen_parser_state *state);
32
33
int passgen_parse_set_range(
34
    struct passgen_parser *parser,
35
    struct passgen_token *token,
36
    passgen_parser_state *state);
37
38
int passgen_parse_repeat(
39
    struct passgen_parser *parser,
40
    struct passgen_token *token,
41
    passgen_parser_state *state);
42
43
int passgen_parse_repeat_range(
44
    struct passgen_parser *parser,
45
    struct passgen_token *token,
46
    passgen_parser_state *state);
47
48
int passgen_parse_special(
49
    struct passgen_parser *parser,
50
    struct passgen_token *token,
51
    passgen_parser_state *state);
52
53
int passgen_parse_special_name(
54
    struct passgen_parser *parser,
55
    struct passgen_token *token,
56
    passgen_parser_state *state);
57
58
39.8k
inline passgen_parser_state *passgen_parser_state_push(passgen_parser *parser) {
59
39.8k
    return passgen_stack_push(&parser->state, NULL);
60
39.8k
}
61
62
passgen_parser_state *passgen_parser_state_push_group(
63
    passgen_parser *parser,
64
    passgen_pattern_group *group,
65
33.3k
    passgen_pattern_segment *segment) {
66
33.3k
    passgen_parser_state *state = passgen_parser_state_push(parser);
67
33.3k
    state->type = PASSGEN_PARSER_GROUP;
68
33.3k
    state->data.group.group = group;
69
33.3k
    state->data.group.segment = segment;
70
33.3k
71
33.3k
    return state;
72
33.3k
}
73
74
passgen_parser_state *passgen_parser_state_push_set(
75
    passgen_parser *parser,
76
    passgen_pattern_set *set,
77
3.09k
    passgen_pattern_range *range) {
78
3.09k
    passgen_parser_state *state = passgen_parser_state_push(parser);
79
3.09k
    state->type = PASSGEN_PARSER_SET;
80
3.09k
    state->data.set.set = set;
81
3.09k
    state->data.set.range = range;
82
3.09k
83
3.09k
    return state;
84
3.09k
}
85
86
passgen_parser_state *passgen_parser_state_push_repeat(
87
    passgen_parser *parser,
88
2.71k
    passgen_pattern_repeat *repeat) {
89
2.71k
    passgen_parser_state *state = passgen_parser_state_push(parser);
90
2.71k
    state->type = PASSGEN_PARSER_REPEAT;
91
2.71k
    repeat->min = 0;
92
2.71k
    repeat->max = 0;
93
2.71k
    state->data.repeat.repeat = repeat;
94
2.71k
95
2.71k
    return state;
96
2.71k
}
97
98
passgen_parser_state *passgen_parser_state_push_multiplier(
99
    passgen_parser *parser,
100
    size_t *multiplier,
101
516
    size_t *sum) {
102
516
    passgen_parser_state *state = passgen_parser_state_push(parser);
103
516
    state->type = PASSGEN_PARSER_MULTIPLIER;
104
516
    *multiplier = 0;
105
516
    state->data.multiplier.value = multiplier;
106
516
    state->data.multiplier.sum = sum;
107
516
    return state;
108
516
}
109
110
passgen_parser_state *passgen_parser_state_push_special(
111
    passgen_parser *parser,
112
135
    passgen_pattern_special *special) {
113
135
    passgen_parser_state *state = passgen_parser_state_push(parser);
114
135
    state->type = PASSGEN_PARSER_SPECIAL;
115
135
    state->data.special.special = special;
116
135
    return state;
117
135
}
118
119
30.1k
void passgen_parser_init(passgen_parser *parser, passgen_pattern *pattern) {
120
30.1k
    passgen_stack_init(&parser->state, sizeof(passgen_parser_state));
121
30.1k
    parser->limit = 0;
122
30.1k
    parser->pattern = pattern;
123
30.1k
    if(!pattern) {
124
0
        parser->pattern = malloc(sizeof(passgen_pattern));
125
0
    }
126
30.1k
    passgen_pattern_init(parser->pattern);
127
30.1k
    passgen_parser_state_push_group(
128
30.1k
        parser,
129
30.1k
        &parser->pattern->group,
130
30.1k
        passgen_pattern_group_segment_append(&parser->pattern->group));
131
30.1k
}
132
133
30.1k
passgen_pattern *passgen_parser_free(passgen_parser *parser) {
134
30.1k
    passgen_stack_free(&parser->state);
135
30.1k
    passgen_pattern *pattern = parser->pattern;
136
30.1k
    parser->pattern = NULL;
137
30.1k
    return pattern;
138
30.1k
}
139
140
passgen_parser_state *
141
0
passgen_parser_state_get(passgen_parser *parser, size_t n) {
142
0
    return passgen_stack_get(&parser->state, n);
143
0
}
144
145
351k
passgen_parser_state *passgen_parser_state_last(passgen_parser *parser) {
146
351k
    return passgen_stack_top(&parser->state);
147
351k
}
148
149
// get the last item, making sure that it's only a single character.
150
// in case of characters, mark it as tainted.
151
static inline passgen_pattern_item *
152
5.05k
last_single_item_taint(passgen_pattern_segment *segment) {
153
5.05k
    passgen_pattern_item *item = passgen_stack_top(&segment->items);
154
5.05k
155
5.05k
    if(!item) {
156
686
        return NULL;
157
686
    }
158
4.36k
159
4.36k
    if(item->kind == PASSGEN_PATTERN_LITERAL) {
160
4.33k
        if(item->data.literal.count > 1) {
161
3.30k
            // save last codepoint
162
3.30k
            int32_t codepoint =
163
3.30k
                item->data.literal.codepoints[item->data.literal.count - 1];
164
3.30k
165
3.30k
            // trim codepoints
166
3.30k
            item->data.literal.count -= 1;
167
3.30k
168
3.30k
            // create new item
169
3.30k
            item = passgen_pattern_segment_new_item(segment);
170
3.30k
            item->kind = PASSGEN_PATTERN_LITERAL;
171
3.30k
            passgen_pattern_literal_init(&item->data.literal);
172
3.30k
            passgen_pattern_literal_append(&item->data.literal, codepoint);
173
3.30k
        }
174
4.33k
175
4.33k
        // characters are always marked as tainted.
176
4.33k
        passgen_pattern_literal_taint(&item->data.literal);
177
4.33k
    }
178
4.36k
179
4.36k
    return item;
180
4.36k
}
181
182
341k
int passgen_parse_token(passgen_parser *parser, passgen_token *token) {
183
341k
    passgen_parser_state *state = passgen_parser_state_last(parser);
184
341k
185
341k
    if(parser->limit && 
parser->state.len >= parser->limit5
) {
186
1
        return -1;
187
1
    }
188
341k
189
341k
    switch(state->type) {
190
317k
        case PASSGEN_PARSER_GROUP:
191
317k
            return passgen_parse_group(parser, token, state);
192
663
        case PASSGEN_PARSER_MULTIPLIER:
193
663
            return passgen_parse_multiplier(parser, token, state);
194
20.0k
        case PASSGEN_PARSER_SET:
195
20.0k
            return passgen_parse_set(parser, token, state);
196
97
        case PASSGEN_PARSER_SET_RANGE:
197
97
            return passgen_parse_set_range(parser, token, state);
198
2.86k
        case PASSGEN_PARSER_REPEAT:
199
2.86k
            return passgen_parse_repeat(parser, token, state);
200
41
        case PASSGEN_PARSER_REPEAT_RANGE:
201
41
            return passgen_parse_repeat_range(parser, token, state);
202
120
        case PASSGEN_PARSER_SPECIAL:
203
120
            return passgen_parse_special(parser, token, state);
204
200
        case PASSGEN_PARSER_SPECIAL_NAME:
205
200
            return passgen_parse_special_name(parser, token, state);
206
0
        default:
207
0
            return -1;
208
341k
    }
209
341k
}
210
211
327k
static void passgen_pattern_segment_clean(passgen_pattern_segment *segment) {
212
327k
    if(segment->items.len > 0) {
213
291k
        // get last item
214
291k
        passgen_pattern_item *last = passgen_stack_top(&segment->items);
215
291k
216
291k
        if(last->kind == PASSGEN_PATTERN_GROUP &&
217
291k
           
last->data.group.multiplier_sum == 0362
) {
218
143
            passgen_pattern_group_free(&last->data.group);
219
143
            passgen_stack_pop(&segment->items, NULL);
220
143
        }
221
291k
    }
222
327k
}
223
224
int passgen_parse_group(
225
    passgen_parser *parser,
226
    passgen_token *token,
227
317k
    passgen_parser_state *state) {
228
317k
    uint32_t codepoint = token->codepoint;
229
317k
    passgen_pattern_group *group;
230
317k
    passgen_pattern_special *special;
231
317k
    passgen_pattern_item *item;
232
317k
233
317k
    passgen_pattern_segment_clean(state->data.group.segment);
234
317k
235
317k
    if(codepoint & PASSGEN_TOKEN_ESCAPED_BIT) {
236
1.93k
        codepoint &= ~PASSGEN_TOKEN_ESCAPED_BIT;
237
1.93k
        switch((char) codepoint) {
238
280
            case '|':
239
280
            case '(':
240
280
            case ')':
241
280
            case '{':
242
280
            case '}':
243
280
            case '[':
244
280
            case ']':
245
280
                // escaped token which would normally do something but
246
280
                // should be treated as text
247
280
                break;
248
280
            case 'm':
249
135
            case 'p':
250
135
            case 'w':
251
135
                // special token
252
135
                special = passgen_pattern_segment_new_special(
253
135
                    state->data.group.segment);
254
135
                passgen_pattern_special_init(special, (char) codepoint);
255
135
                passgen_parser_state_push_special(parser, special);
256
135
                return 0;
257
1.51k
            default:
258
1.51k
                // error
259
1.51k
                return -1;
260
315k
        }
261
315k
    } else {
262
315k
        switch((char) codepoint) {
263
3.21k
            case '|':
264
3.21k
                if(state->data.group.segment->multiplier > 0) {
265
3.11k
                    // create new segment and parser state
266
3.11k
                    state->data.group.segment =
267
3.11k
                        passgen_pattern_group_segment_append(
268
3.11k
                            state->data.group.group);
269
3.11k
                } else {
270
101
                    // if the previous segment had a zero multiplier, recycle it
271
101
                    passgen_pattern_segment_free(state->data.group.segment);
272
101
                    passgen_pattern_segment_init(state->data.group.segment);
273
101
                }
274
3.21k
                return 0;
275
3.29k
            case ')':
276
3.29k
                if(state->data.group.segment->multiplier == 0) {
277
1
                    passgen_pattern_segment_free(state->data.group.segment);
278
1
                    passgen_stack_pop(&state->data.group.group->segments, NULL);
279
1
                }
280
3.29k
                if(parser->state.len <= 1) {
281
2.93k
                    return -1;
282
2.93k
                }
283
362
                passgen_pattern_group_finish(state->data.group.group);
284
362
                passgen_stack_pop(&parser->state, NULL);
285
362
                return 0;
286
3.24k
            case '(':
287
3.24k
                // we're supposed to read something in.
288
3.24k
                group = passgen_pattern_segment_new_group(
289
3.24k
                    state->data.group.segment);
290
3.24k
                passgen_parser_state_push_group(
291
3.24k
                    parser,
292
3.24k
                    group,
293
3.24k
                    passgen_pattern_group_segment_append(group));
294
3.24k
                return 0;
295
3.09k
            case '[':
296
3.09k
                passgen_parser_state_push_set(
297
3.09k
                    parser,
298
3.09k
                    passgen_pattern_segment_new_set(state->data.group.segment),
299
3.09k
                    NULL);
300
3.09k
                return 0;
301
3.22k
            case '{':
302
3.22k
                item = last_single_item_taint(state->data.group.segment);
303
3.22k
                // error, there was no item
304
3.22k
                if(item) {
305
2.71k
                    passgen_parser_state_push_repeat(parser, &item->repeat);
306
2.71k
                    return 0;
307
2.71k
                } else {
308
516
                    state->data.group.segment->multiplier = 0;
309
516
                    state->data.group.group->multiplier_sum -= 1;
310
516
                    passgen_parser_state_push_multiplier(
311
516
                        parser,
312
516
                        &state->data.group.segment->multiplier,
313
516
                        &state->data.group.group->multiplier_sum);
314
516
                    return 0;
315
516
                }
316
1.82k
            case '?':
317
1.82k
                item = last_single_item_taint(state->data.group.segment);
318
1.82k
                if(item) {
319
1.65k
                    item->maybe = true;
320
1.65k
                    return 0;
321
1.65k
                } else {
322
170
                    // error: maybe without a previous item
323
170
                    return -1;
324
170
                }
325
298k
            default:
326
298k
                break;
327
298k
        }
328
298k
    }
329
298k
330
298k
    // check if the last item was a character that we can add this one to
331
298k
    if(state->data.group.segment->items.len) {
332
265k
        passgen_pattern_item *last =
333
265k
            passgen_stack_top(&state->data.group.segment->items);
334
265k
335
265k
        if(last->kind == PASSGEN_PATTERN_LITERAL) {
336
265k
            if(0 ==
337
265k
               passgen_pattern_literal_append(&last->data.literal, codepoint)) {
338
236k
                return 0;
339
236k
            }
340
62.1k
        }
341
265k
    }
342
62.1k
343
62.1k
    passgen_pattern_literal *literal =
344
62.1k
        passgen_pattern_segment_new_char(state->data.group.segment);
345
62.1k
    passgen_pattern_literal_append(literal, codepoint);
346
62.1k
347
62.1k
    return 0;
348
62.1k
}
349
350
int passgen_parse_set(
351
    passgen_parser *parser,
352
    passgen_token *token,
353
20.0k
    passgen_parser_state *state) {
354
20.0k
    passgen_pattern_set *set = state->data.set.set;
355
20.0k
356
20.0k
    // this set's over
357
20.0k
    if(token->codepoint == ']') {
358
302
        // compute sum of choices and choices list for binary search.
359
302
        size_t choices = 0;
360
302
        set->choices_list = malloc(sizeof(size_t) * set->items.len);
361
1.26k
        for(size_t i = 0; i < set->items.len; 
i++958
) {
362
958
            passgen_pattern_range *range = passgen_stack_get(&set->items, i);
363
958
            choices += 1 + range->end - range->start;
364
958
            set->choices_list[i] = choices;
365
958
        }
366
302
367
302
        passgen_stack_pop(&parser->state, NULL);
368
302
        return 0;
369
302
    }
370
19.7k
371
19.7k
    // part of a range expression
372
19.7k
    if(state->data.set.range && 
token->codepoint == '-'16.8k
) {
373
117
        state->type = PASSGEN_PARSER_SET_RANGE;
374
117
        return 0;
375
117
    }
376
19.5k
377
19.5k
    passgen_pattern_range *range = passgen_pattern_set_range_append(set);
378
19.5k
379
19.5k
    range->start = token->codepoint & ~PASSGEN_TOKEN_ESCAPED_BIT;
380
19.5k
    range->end = token->codepoint & ~PASSGEN_TOKEN_ESCAPED_BIT;
381
19.5k
382
19.5k
    state->data.set.range = range;
383
19.5k
384
19.5k
    return 0;
385
19.5k
}
386
387
int passgen_parse_set_range(
388
    passgen_parser *parser,
389
    passgen_token *token,
390
97
    passgen_parser_state *state) {
391
97
    (void) parser;
392
97
    if(token->codepoint == ']') {
393
1
        return -1;
394
1
    }
395
96
396
96
    if(token->codepoint < state->data.set.range->start) {
397
40
        return -1;
398
40
    }
399
56
400
56
    state->data.set.range->end = token->codepoint;
401
56
    state->type = PASSGEN_PARSER_SET;
402
56
403
56
    return 0;
404
56
}
405
406
int passgen_parse_multiplier(
407
    passgen_parser *parser,
408
    passgen_token *token,
409
663
    passgen_parser_state *state) {
410
663
    if(token->codepoint == '}') {
411
111
        *state->data.multiplier.sum += *state->data.multiplier.value;
412
111
        passgen_stack_pop(&parser->state, NULL);
413
111
        return 0;
414
111
    }
415
552
416
552
    if(token->codepoint >= '0' && 
token->codepoint <= '9'496
) {
417
174
        uint8_t digit = token->codepoint - '0';
418
174
419
174
        *state->data.multiplier.value *= 10;
420
174
        *state->data.multiplier.value += digit;
421
174
422
174
        return 0;
423
174
    }
424
378
425
378
    return -1;
426
378
}
427
428
int passgen_parse_repeat(
429
    passgen_parser *parser,
430
    passgen_token *token,
431
2.86k
    passgen_parser_state *state) {
432
2.86k
    // this set's over
433
2.86k
    if(token->codepoint == '}') {
434
46
        state->data.repeat.repeat->max = state->data.repeat.repeat->min;
435
46
        passgen_stack_pop(&parser->state, NULL);
436
46
        return 0;
437
46
    }
438
2.81k
439
2.81k
    if(token->codepoint == ',') {
440
31
        state->data.repeat.repeat->max = 0;
441
31
        state->type = PASSGEN_PARSER_REPEAT_RANGE;
442
31
        return 0;
443
31
    }
444
2.78k
445
2.78k
    if(token->codepoint >= '0' && 
token->codepoint <= '9'2.53k
) {
446
421
        uint8_t digit = token->codepoint - '0';
447
421
448
421
        state->data.repeat.repeat->min *= 10;
449
421
        state->data.repeat.repeat->min += digit;
450
421
451
421
        return 0;
452
421
    }
453
2.36k
454
2.36k
    return -1;
455
2.36k
}
456
457
int passgen_parse_repeat_range(
458
    passgen_parser *parser,
459
    passgen_token *token,
460
41
    passgen_parser_state *state) {
461
41
    if(token->codepoint == '}') {
462
9
        passgen_stack_pop(&parser->state, NULL);
463
9
        return 0;
464
9
    }
465
32
466
32
    if(token->codepoint >= '0' && 
token->codepoint <= '9'27
) {
467
13
        uint8_t digit = token->codepoint - '0';
468
13
469
13
        state->data.repeat.repeat->max *= 10;
470
13
        state->data.repeat.repeat->max += digit;
471
13
472
13
        return 0;
473
13
    }
474
19
475
19
    return -1;
476
19
}
477
478
int passgen_parse_special(
479
    passgen_parser *parser,
480
    passgen_token *token,
481
120
    passgen_parser_state *state) {
482
120
    (void) parser;
483
120
484
120
    if(token->codepoint == '{') {
485
10
        state->type = PASSGEN_PARSER_SPECIAL_NAME;
486
10
        return 0;
487
10
    }
488
110
489
110
    return -1;
490
110
}
491
492
int passgen_parse_special_name(
493
    passgen_parser *parser,
494
    passgen_token *token,
495
200
    passgen_parser_state *state) {
496
200
    (void) parser;
497
200
498
200
    if(token->codepoint == '}') {
499
10
        passgen_stack_pop(&parser->state, NULL);
500
190
    } else {
501
190
        passgen_pattern_special_push(
502
190
            state->data.special.special,
503
190
            token->codepoint & ~PASSGEN_TOKEN_ESCAPED_BIT);
504
190
    }
505
200
506
200
    return 0;
507
200
}
508
509
13.5k
int passgen_parse_finish(passgen_parser *parser) {
510
13.5k
    // make sure we just have one state on the stack, the initial one.
511
13.5k
    if(parser->state.len != 1) {
512
4.03k
        return -1;
513
4.03k
    }
514
9.47k
515
9.47k
    // make sure last state is a group
516
9.47k
    passgen_parser_state *state = passgen_parser_state_last(parser);
517
9.47k
    if(state->type != PASSGEN_PARSER_GROUP) {
518
0
        return -1;
519
0
    }
520
9.47k
521
9.47k
    // clean last state
522
9.47k
    passgen_pattern_segment_clean(state->data.group.segment);
523
9.47k
524
9.47k
    return 0;
525
9.47k
}
526
527
int passgen_parser_unicode(
528
    passgen_parser *parser,
529
    uint32_t *data,
530
10.0k
    size_t length) {
531
10.0k
    passgen_token_parser token_parser = {0};
532
10.0k
    passgen_token token = {0};
533
10.0k
    int ret;
534
10.0k
535
159k
    for(size_t pos = 0; pos < length; 
pos++149k
) {
536
150k
        ret = passgen_token_parse(&token_parser, &token, 1, data[pos]);
537
150k
538
150k
        if(ret == PASSGEN_TOKEN_INIT) {
539
150k
            ret = passgen_parse_token(parser, &token);
540
150k
541
150k
            if(ret != 0) {
542
1.14k
                return ret;
543
1.14k
            }
544
150k
        }
545
150k
    }
546
10.0k
547
10.0k
    
return 08.85k
;
548
10.0k
}