/builds/xfbs/passgen/src/generate.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "passgen/generate.h" |
2 | | |
3 | | #include "passgen/assert.h" |
4 | | #include "passgen/container/hashmap.h" |
5 | | #include "passgen/container/stack.h" |
6 | | #include "passgen/markov.h" |
7 | | #include "passgen/pattern/group.h" |
8 | | #include "passgen/pattern/literal.h" |
9 | | #include "passgen/pattern/pattern.h" |
10 | | #include "passgen/pattern/range.h" |
11 | | #include "passgen/pattern/repeat.h" |
12 | | #include "passgen/pattern/segment.h" |
13 | | #include "passgen/pattern/segment_item.h" |
14 | | #include "passgen/pattern/set.h" |
15 | | #include "passgen/pattern/special.h" |
16 | | #include "passgen/util/utf8.h" |
17 | | #include "passgen/wordlist.h" |
18 | | #include "try.h" |
19 | | |
20 | | #include <string.h> |
21 | | |
22 | | typedef struct { |
23 | | passgen_env *env; |
24 | | size_t depth; |
25 | | double *entropy; |
26 | | void *data; |
27 | | passgen_generate_cb *func; |
28 | | } passgen_generate_context; |
29 | | |
30 | | // Emit a character |
31 | 145 | static inline int emit(passgen_generate_context *context, uint32_t codepoint) { |
32 | 145 | return context->func(context->data, codepoint); |
33 | 145 | } |
34 | | |
35 | | // Recurse |
36 | 90 | static inline int descend(passgen_generate_context *context) { |
37 | 90 | if(!context->depth) { |
38 | 3 | return 1; |
39 | 3 | } |
40 | 87 | |
41 | 87 | context->depth -= 1; |
42 | 87 | return 0; |
43 | 87 | } |
44 | | |
45 | 84 | static inline void ascend(passgen_generate_context *context) { |
46 | 84 | context->depth += 1; |
47 | 84 | } |
48 | | |
49 | | static size_t passgen_generate_repeat( |
50 | | passgen_generate_context *context, |
51 | | const passgen_pattern_repeat *repeat); |
52 | | |
53 | | static int passgen_generate_set( |
54 | | passgen_generate_context *context, |
55 | | const passgen_pattern_set *set); |
56 | | |
57 | | static int passgen_generate_literal( |
58 | | passgen_generate_context *context, |
59 | | const passgen_pattern_literal *character); |
60 | | |
61 | | static int passgen_generate_special_markov( |
62 | | passgen_generate_context *context, |
63 | | const passgen_pattern_special *special); |
64 | | |
65 | | static int passgen_generate_special_wordlist( |
66 | | passgen_generate_context *context, |
67 | | const passgen_pattern_special *special); |
68 | | |
69 | | static int passgen_generate_special( |
70 | | passgen_generate_context *context, |
71 | | const passgen_pattern_special *special); |
72 | | |
73 | | static int passgen_generate_group( |
74 | | passgen_generate_context *context, |
75 | | const passgen_pattern_group *group); |
76 | | |
77 | | static int passgen_generate_item( |
78 | | passgen_generate_context *context, |
79 | | const passgen_pattern_item *item); |
80 | | |
81 | | static int passgen_generate_segment( |
82 | | passgen_generate_context *context, |
83 | | const passgen_pattern_segment *segment); |
84 | | |
85 | | struct fillpos { |
86 | | uint32_t *buffer; |
87 | | size_t cur; |
88 | | size_t len; |
89 | | }; |
90 | | |
91 | | struct fillpos_utf8 { |
92 | | uint8_t *buffer; |
93 | | size_t cur; |
94 | | size_t len; |
95 | | }; |
96 | | |
97 | 47 | static int passgen_generate_write_buffer(void *data, uint32_t codepoint) { |
98 | 47 | struct fillpos *fillpos = data; |
99 | 47 | |
100 | 47 | if(fillpos->cur == fillpos->len) { |
101 | 0 | return -1; |
102 | 0 | } |
103 | 47 | |
104 | 47 | fillpos->buffer[fillpos->cur] = codepoint; |
105 | 47 | fillpos->cur++; |
106 | 47 | |
107 | 47 | return 0; |
108 | 47 | } |
109 | | |
110 | 92 | static int passgen_generate_write_buffer_utf8(void *data, uint32_t codepoint) { |
111 | 92 | struct fillpos_utf8 *fillpos = data; |
112 | 92 | |
113 | 92 | if(fillpos->cur == fillpos->len) { |
114 | 0 | return -1; |
115 | 0 | } |
116 | 92 | |
117 | 92 | if((fillpos->cur + 4) <= fillpos->len) { |
118 | 92 | int bytes = passgen_utf8_encode_codepoint( |
119 | 92 | (uint8_t *) &fillpos->buffer[fillpos->cur], |
120 | 92 | codepoint); |
121 | 92 | |
122 | 92 | if(bytes < 0) { |
123 | 0 | // error happened during encoding. |
124 | 0 | return -1; |
125 | 0 | } |
126 | 92 | |
127 | 92 | fillpos->cur += bytes; |
128 | 92 | } else { |
129 | 0 | char buffer[4]; |
130 | 0 | int bytes = |
131 | 0 | passgen_utf8_encode_codepoint((uint8_t *) &buffer[0], codepoint); |
132 | 0 |
|
133 | 0 | if(bytes < 0) { |
134 | 0 | // error happened during encoding. |
135 | 0 | return -1; |
136 | 0 | } |
137 | 0 | |
138 | 0 | if(bytes <= (fillpos->len - fillpos->cur)) { |
139 | 0 | memcpy(&fillpos->buffer[fillpos->cur], &buffer[0], bytes); |
140 | 0 | fillpos->cur += bytes; |
141 | 0 | } else { |
142 | 0 | // error: encoded doesn't fit in buffer. |
143 | 0 | return -1; |
144 | 0 | } |
145 | 92 | } |
146 | 92 | |
147 | 92 | return 0; |
148 | 92 | } |
149 | | |
150 | | /// Write JSON-escaped UTF-8 to buffer. |
151 | | static int |
152 | 0 | passgen_generate_write_buffer_json_utf8(void *data, uint32_t codepoint) { |
153 | 0 | struct fillpos_utf8 *fillpos = data; |
154 | 0 |
|
155 | 0 | if(fillpos->cur == fillpos->len) { |
156 | 0 | return -1; |
157 | 0 | } |
158 | 0 | |
159 | 0 | unsigned char buffer[4] = {'\\', 0}; |
160 | 0 | size_t bytes = 2; |
161 | 0 | switch(codepoint) { |
162 | 0 | case '"': |
163 | 0 | buffer[1] = '"'; |
164 | 0 | break; |
165 | 0 | case '\\': |
166 | 0 | buffer[1] = '\\'; |
167 | 0 | break; |
168 | 0 | case '\b': |
169 | 0 | buffer[1] = 'b'; |
170 | 0 | break; |
171 | 0 | case '\f': |
172 | 0 | buffer[1] = 'f'; |
173 | 0 | break; |
174 | 0 | case '\r': |
175 | 0 | buffer[1] = 'r'; |
176 | 0 | break; |
177 | 0 | case '\n': |
178 | 0 | buffer[1] = 'n'; |
179 | 0 | break; |
180 | 0 | case '\t': |
181 | 0 | buffer[1] = 't'; |
182 | 0 | break; |
183 | 0 | default: |
184 | 0 | bytes = passgen_utf8_encode_codepoint( |
185 | 0 | (uint8_t *) &buffer[0], |
186 | 0 | codepoint); |
187 | 0 | } |
188 | 0 |
|
189 | 0 | // check that no error happened. |
190 | 0 | if(!bytes) { |
191 | 0 | return -1; |
192 | 0 | } |
193 | 0 | |
194 | 0 | // make sure it fits. |
195 | 0 | if(bytes <= (fillpos->len - fillpos->cur)) { |
196 | 0 | memcpy(&fillpos->buffer[fillpos->cur], &buffer[0], bytes); |
197 | 0 | fillpos->cur += bytes; |
198 | 0 | } else { |
199 | 0 | return -1; |
200 | 0 | } |
201 | 0 | |
202 | 0 | return 0; |
203 | 0 | } |
204 | | |
205 | | size_t passgen_generate_fill_unicode( |
206 | | const passgen_pattern *pattern, |
207 | | passgen_env *env, |
208 | | double *entropy, |
209 | | uint32_t *buffer, |
210 | 28 | size_t len) { |
211 | 28 | struct fillpos fillpos = { |
212 | 28 | .buffer = buffer, |
213 | 28 | .len = len, |
214 | 28 | .cur = 0, |
215 | 28 | }; |
216 | 28 | |
217 | 28 | try(passgen_generate( |
218 | 28 | pattern, |
219 | 28 | env, |
220 | 28 | entropy, |
221 | 28 | &fillpos, |
222 | 28 | passgen_generate_write_buffer)); |
223 | 28 | |
224 | 28 | return fillpos.cur; |
225 | 28 | } |
226 | | |
227 | | size_t passgen_generate_fill_utf8( |
228 | | const passgen_pattern *pattern, |
229 | | passgen_env *env, |
230 | | double *entropy, |
231 | | uint8_t *buffer, |
232 | 31 | size_t len) { |
233 | 31 | struct fillpos_utf8 fillpos = { |
234 | 31 | .buffer = buffer, |
235 | 31 | .len = len, |
236 | 31 | .cur = 0, |
237 | 31 | }; |
238 | 31 | |
239 | 31 | try(passgen_generate( |
240 | 31 | pattern, |
241 | 31 | env, |
242 | 31 | entropy, |
243 | 31 | &fillpos, |
244 | 31 | passgen_generate_write_buffer_utf8)); |
245 | 31 | |
246 | 31 | return fillpos.cur; |
247 | 31 | } |
248 | | |
249 | | size_t passgen_generate_fill_json_utf8( |
250 | | const passgen_pattern *pattern, |
251 | | passgen_env *env, |
252 | | double *entropy, |
253 | | uint8_t *buffer, |
254 | 0 | size_t len) { |
255 | 0 | struct fillpos_utf8 fillpos = { |
256 | 0 | .buffer = buffer, |
257 | 0 | .len = len, |
258 | 0 | .cur = 0, |
259 | 0 | }; |
260 | 0 |
|
261 | 0 | try(passgen_generate( |
262 | 0 | pattern, |
263 | 0 | env, |
264 | 0 | entropy, |
265 | 0 | &fillpos, |
266 | 0 | passgen_generate_write_buffer_json_utf8)); |
267 | 0 |
|
268 | 0 | return fillpos.cur; |
269 | 0 | } |
270 | | |
271 | | static size_t passgen_generate_repeat( |
272 | | passgen_generate_context *context, |
273 | 85 | const passgen_pattern_repeat *repeat) { |
274 | 85 | size_t difference = repeat->max - repeat->min; |
275 | 85 | |
276 | 85 | // if there is no difference to pick, just return here |
277 | 85 | if(0 == difference) { |
278 | 79 | return repeat->min; |
279 | 79 | } |
280 | 6 | |
281 | 6 | // get random number to choose from the range |
282 | 6 | size_t choice = |
283 | 6 | passgen_random_u64_max(context->env->random, difference + 1); |
284 | 6 | |
285 | 6 | // keep track of entropy |
286 | 6 | if(context->entropy) { |
287 | 4 | *context->entropy *= difference + 1; |
288 | 4 | } |
289 | 6 | |
290 | 6 | return repeat->min + choice; |
291 | 6 | } |
292 | | |
293 | | static int passgen_generate_set( |
294 | | passgen_generate_context *context, |
295 | 26 | const passgen_pattern_set *set) { |
296 | 26 | // if this set is empty, we're done. |
297 | 26 | if(set->items.len == 0) { |
298 | 0 | return 0; |
299 | 0 | } |
300 | 26 | |
301 | 26 | // compute number of possible codepoints |
302 | 26 | // TODO: generate this on the fly or on demand? |
303 | 26 | size_t possible = set->choices_list[set->items.len - 1]; |
304 | 26 | |
305 | 26 | passgen_assert(possible != 0); |
306 | 26 | |
307 | 26 | size_t choice = passgen_random_u64_max(context->env->random, possible); |
308 | 26 | |
309 | 26 | // keep track of entropy |
310 | 26 | if(context->entropy) { |
311 | 14 | *context->entropy *= possible; |
312 | 14 | } |
313 | 26 | |
314 | 26 | // locate choice in list of choices. |
315 | 26 | // TODO: binary search. |
316 | 26 | size_t num; |
317 | 44 | for(num = 0; num < set->items.len; num++18 ) { |
318 | 44 | if(choice < set->choices_list[num]) { |
319 | 26 | break; |
320 | 26 | } |
321 | 44 | } |
322 | 26 | |
323 | 26 | passgen_assert(num != set->items.len); |
324 | 26 | |
325 | 26 | /* adjust choice to be relative offset */ |
326 | 26 | if(num) { |
327 | 12 | choice -= set->choices_list[num - 1]; |
328 | 12 | } |
329 | 26 | |
330 | 26 | passgen_pattern_range *range = passgen_stack_get(&set->items, num); |
331 | 26 | |
332 | 26 | return emit(context, range->start + choice); |
333 | 26 | } |
334 | | |
335 | | static int passgen_generate_literal( |
336 | | passgen_generate_context *context, |
337 | 51 | const passgen_pattern_literal *literal) { |
338 | 51 | passgen_assert(literal->count > 0); |
339 | 51 | passgen_assert(literal->count < 8); |
340 | 51 | |
341 | 170 | for(size_t i = 0; i < literal->count; i++119 ) { |
342 | 119 | try(emit(context, literal->codepoints[i])); |
343 | 119 | } |
344 | 51 | |
345 | 51 | return 0; |
346 | 51 | } |
347 | | |
348 | | static int passgen_generate_special_markov( |
349 | | passgen_generate_context *context, |
350 | 0 | const passgen_pattern_special *special) { |
351 | 0 | passgen_hashmap_entry *entry = |
352 | 0 | passgen_hashmap_lookup(&context->env->wordlists, special->parameters); |
353 | 0 | if(!entry) { |
354 | 0 | return -1; |
355 | 0 | } |
356 | 0 | passgen_wordlist *wordlist = entry->value; |
357 | 0 | if(!wordlist->parsed) { |
358 | 0 | passgen_wordlist_parse(wordlist); |
359 | 0 | } |
360 | 0 | if(!wordlist->parsed_markov) { |
361 | 0 | passgen_wordlist_parse_markov(wordlist); |
362 | 0 | } |
363 | 0 | passgen_markov *markov = &wordlist->markov; |
364 | 0 | uint32_t word[128]; |
365 | 0 | size_t pos = markov->level; |
366 | 0 | memset(word, 0, pos * sizeof(uint32_t)); |
367 | 0 | double *entropy = context->entropy ? &*context->entropy : NULL; |
368 | 0 | do { |
369 | 0 | word[pos] = passgen_markov_generate( |
370 | 0 | markov, |
371 | 0 | &word[pos - markov->level], |
372 | 0 | context->env->random, |
373 | 0 | entropy); |
374 | 0 | pos++; |
375 | 0 | } while(word[pos - 1]); |
376 | 0 |
|
377 | 0 | pos = markov->level; |
378 | 0 | while(word[pos]) { |
379 | 0 | try(emit(context, word[pos])); |
380 | 0 | pos++; |
381 | 0 | } |
382 | 0 |
|
383 | 0 | return 0; |
384 | 0 | } |
385 | | |
386 | | static int passgen_generate_special_wordlist( |
387 | | passgen_generate_context *context, |
388 | 0 | const passgen_pattern_special *special) { |
389 | 0 | passgen_hashmap_entry *entry = |
390 | 0 | passgen_hashmap_lookup(&context->env->wordlists, special->parameters); |
391 | 0 | if(!entry) { |
392 | 0 | return -1; |
393 | 0 | } |
394 | 0 | passgen_wordlist *wordlist = entry->value; |
395 | 0 | if(!wordlist->parsed) { |
396 | 0 | passgen_wordlist_parse(wordlist); |
397 | 0 | } |
398 | 0 | const char *word = passgen_wordlist_random(wordlist, context->env->random); |
399 | 0 | while(*word) { |
400 | 0 | try(emit(context, *word)); |
401 | 0 | word++; |
402 | 0 | } |
403 | 0 |
|
404 | 0 | if(context->entropy) { |
405 | 0 | *context->entropy *= passgen_wordlist_count(wordlist); |
406 | 0 | } |
407 | 0 |
|
408 | 0 | return 0; |
409 | 0 | } |
410 | | |
411 | | static int passgen_generate_special_preset( |
412 | | passgen_generate_context *context, |
413 | 0 | const passgen_pattern_special *special) { |
414 | 0 | (void) special; |
415 | 0 | // TODO: implement |
416 | 0 | return 0; |
417 | 0 | } |
418 | | |
419 | | static int passgen_generate_special( |
420 | | passgen_generate_context *context, |
421 | 0 | const passgen_pattern_special *special) { |
422 | 0 | switch(special->kind) { |
423 | 0 | case PASSGEN_PATTERN_SPECIAL_MARKOV: |
424 | 0 | return passgen_generate_special_markov(context, special); |
425 | 0 | case PASSGEN_PATTERN_SPECIAL_WORDLIST: |
426 | 0 | return passgen_generate_special_wordlist(context, special); |
427 | 0 | case PASSGEN_PATTERN_SPECIAL_PRESET: |
428 | 0 | return passgen_generate_special_preset(context, special); |
429 | 0 | default: |
430 | 0 | return 1; |
431 | 0 | } |
432 | 0 | return 0; |
433 | 0 | } |
434 | | |
435 | | static int passgen_generate_item( |
436 | | passgen_generate_context *context, |
437 | 86 | const passgen_pattern_item *item) { |
438 | 86 | // if it is a maybe (has a question mark following it), decide first if we |
439 | 86 | // want to emit it or not. |
440 | 86 | if(item->maybe) { |
441 | 6 | if(!passgen_random_bool(context->env->random)) { |
442 | 1 | return 0; |
443 | 1 | } |
444 | 85 | } |
445 | 85 | |
446 | 85 | // compute random number of repetitions |
447 | 85 | size_t reps = passgen_generate_repeat(context, &item->repeat); |
448 | 85 | |
449 | 185 | for(size_t i = 0; i < reps; i++100 ) { |
450 | 103 | switch(item->kind) { |
451 | 26 | case PASSGEN_PATTERN_SET: |
452 | 26 | try(passgen_generate_set(context, &item->data.set)); |
453 | 26 | break; |
454 | 51 | case PASSGEN_PATTERN_LITERAL: |
455 | 51 | try(passgen_generate_literal(context, &item->data.literal)); |
456 | 51 | break; |
457 | 51 | case PASSGEN_PATTERN_SPECIAL: |
458 | 0 | try(passgen_generate_special(context, &item->data.special)); |
459 | 0 | break; |
460 | 26 | case PASSGEN_PATTERN_GROUP: |
461 | 26 | try(passgen_generate_group(context, &item->data.group)); |
462 | 26 | break23 ; |
463 | 26 | default: |
464 | 0 | passgen_assert(false); |
465 | 0 | break; |
466 | 103 | } |
467 | 103 | } |
468 | 85 | |
469 | 85 | // unreachable |
470 | 85 | return 082 ; |
471 | 85 | } |
472 | | |
473 | | static int passgen_generate_segment( |
474 | | passgen_generate_context *context, |
475 | 87 | const passgen_pattern_segment *segment) { |
476 | 170 | for(size_t i = 0; i < segment->items.len; i++83 ) { |
477 | 86 | passgen_pattern_item *item = passgen_stack_get(&segment->items, i); |
478 | 86 | |
479 | 86 | try(passgen_generate_item(context, item)); |
480 | 86 | } |
481 | 87 | |
482 | 87 | return 084 ; |
483 | 87 | } |
484 | | |
485 | | static int passgen_generate_group( |
486 | | passgen_generate_context *context, |
487 | 90 | const passgen_pattern_group *group) { |
488 | 90 | // descend in depth |
489 | 90 | try(descend(context)); |
490 | 90 | |
491 | 90 | if(87 group->multiplier_sum == 087 ) { |
492 | 0 | return 1; |
493 | 0 | } |
494 | 87 | |
495 | 87 | // choose random segment from segments |
496 | 87 | size_t choice = |
497 | 87 | passgen_random_u64_max(context->env->random, group->multiplier_sum); |
498 | 87 | size_t segment_index = 0; |
499 | 87 | passgen_pattern_segment *segment = |
500 | 87 | passgen_stack_get(&group->segments, segment_index); |
501 | 87 | |
502 | 100 | while(choice >= segment->multiplier) { |
503 | 13 | choice -= segment->multiplier; |
504 | 13 | segment_index += 1; |
505 | 13 | segment = passgen_stack_get(&group->segments, segment_index); |
506 | 13 | } |
507 | 87 | |
508 | 87 | // keep track of entropy |
509 | 87 | if(context->entropy) { |
510 | 45 | *context->entropy *= group->multiplier_sum; |
511 | 45 | *context->entropy /= segment->multiplier; |
512 | 45 | } |
513 | 87 | |
514 | 87 | try(passgen_generate_segment(context, segment)); |
515 | 87 | |
516 | 87 | ascend(context); |
517 | 84 | |
518 | 84 | return 0; |
519 | 87 | } |
520 | | |
521 | | int passgen_generate( |
522 | | const passgen_pattern *pattern, |
523 | | passgen_env *env, |
524 | | double *entropy, |
525 | | void *data, |
526 | 64 | passgen_generate_cb *func) { |
527 | 64 | // when entropy collection is request (by passing a non-NULL pointer), |
528 | 64 | // initialize it. |
529 | 64 | if(entropy) { |
530 | 31 | *entropy = 1.0; |
531 | 31 | } |
532 | 64 | |
533 | 64 | passgen_generate_context context = { |
534 | 64 | .env = env, |
535 | 64 | .depth = env->depth_limit, |
536 | 64 | .func = func, |
537 | 64 | .data = data, |
538 | 64 | .entropy = entropy, |
539 | 64 | }; |
540 | 64 | |
541 | 64 | return passgen_generate_group(&context, &pattern->group); |
542 | 64 | } |