/builds/xfbs/passgen/src/wordlist.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "passgen/wordlist.h" |
2 | | #include "passgen/assert.h" |
3 | | #include "passgen/util/utf8.h" |
4 | | #include "try.h" |
5 | | #include <stdlib.h> |
6 | | #include <string.h> |
7 | | |
8 | 3 | #define ALLOC_INITIAL 256 |
9 | 2 | #define ALLOC_INCREASE 2 |
10 | | |
11 | | void passgen_wordlist_init( |
12 | | passgen_wordlist *wordlist, |
13 | | FILE *file, |
14 | 4 | size_t markov_length) { |
15 | 4 | wordlist->parsed = false; |
16 | 4 | wordlist->parsed_markov = false; |
17 | 4 | wordlist->file = file; |
18 | 4 | wordlist->should_close_file = true; |
19 | 4 | passgen_markov_init(&wordlist->markov, markov_length); |
20 | 4 | } |
21 | | |
22 | 3 | int passgen_wordlist_parse(passgen_wordlist *wordlist) { |
23 | 3 | passgen_assert(!wordlist->parsed); |
24 | 3 | wordlist->parsed = true; |
25 | 3 | try(passgen_wordlist_read(wordlist, wordlist->file)); |
26 | 3 | wordlist->file = NULL; |
27 | 3 | passgen_wordlist_scan(wordlist); |
28 | 3 | return 0; |
29 | 3 | } |
30 | | |
31 | 3 | void passgen_wordlist_parse_markov(passgen_wordlist *wordlist) { |
32 | 3 | passgen_assert(!wordlist->parsed_markov); |
33 | 3 | wordlist->parsed_markov = true; |
34 | 3 | |
35 | 3 | size_t unicode_buffer_len = 256; |
36 | 3 | uint32_t unicode_buffer[unicode_buffer_len]; |
37 | 685 | for(size_t i = 0; i < wordlist->count; i++682 ) { |
38 | 682 | uint32_t *unicode_buffer_pos = &unicode_buffer[0]; |
39 | 682 | const uint8_t *word = (const uint8_t *) wordlist->words[i]; |
40 | 682 | size_t word_length = strlen((const char *) word); |
41 | 682 | |
42 | 682 | // decode utf8 |
43 | 682 | int ret = passgen_utf8_decode( |
44 | 682 | &unicode_buffer_pos, |
45 | 682 | unicode_buffer_len, |
46 | 682 | NULL, |
47 | 682 | &word, |
48 | 682 | word_length); |
49 | 682 | |
50 | 682 | // make sure the conversion worked |
51 | 682 | passgen_assert(ret == PASSGEN_UTF8_SUCCESS); |
52 | 682 | |
53 | 682 | // add word to markov chain |
54 | 682 | passgen_markov_add( |
55 | 682 | &wordlist->markov, |
56 | 682 | unicode_buffer, |
57 | 682 | unicode_buffer_pos - &unicode_buffer[0], |
58 | 682 | 1); |
59 | 682 | } |
60 | 3 | } |
61 | | |
62 | | void passgen_wordlist_load( |
63 | | passgen_wordlist *wordlist, |
64 | | FILE *file, |
65 | 3 | size_t markov_length) { |
66 | 3 | passgen_wordlist_init(wordlist, file, markov_length); |
67 | 3 | passgen_wordlist_parse(wordlist); |
68 | 3 | passgen_wordlist_parse_markov(wordlist); |
69 | 3 | } |
70 | | |
71 | 3 | int passgen_wordlist_read(passgen_wordlist *wordlist, FILE *file) { |
72 | 3 | // get size of file |
73 | 3 | fseek(file, 0, SEEK_END); |
74 | 3 | wordlist->size = ftell(file); |
75 | 3 | fseek(file, 0, SEEK_SET); |
76 | 3 | |
77 | 3 | // allocate buffer to hold entire file and read in (plus zero termination) |
78 | 3 | wordlist->data = malloc(wordlist->size + 1); |
79 | 3 | passgen_assert(wordlist->data); |
80 | 3 | |
81 | 3 | // read in entire wordlist |
82 | 3 | size_t read = fread(wordlist->data, 1, wordlist->size, file); |
83 | 3 | if(read != wordlist->size) { |
84 | 0 | return 1; |
85 | 0 | } |
86 | 3 | |
87 | 3 | // null-terminate wordlist |
88 | 3 | wordlist->data[wordlist->size] = 0; |
89 | 3 | |
90 | 3 | if(wordlist->should_close_file) { |
91 | 3 | fclose(file); |
92 | 3 | } |
93 | 3 | |
94 | 3 | return 0; |
95 | 3 | } |
96 | | |
97 | 3 | void passgen_wordlist_scan(passgen_wordlist *wordlist) { |
98 | 3 | char *state = NULL, *token; |
99 | 3 | |
100 | 3 | size_t capacity = ALLOC_INITIAL; |
101 | 3 | wordlist->words = malloc(sizeof(char *) * capacity); |
102 | 3 | wordlist->count = 0; |
103 | 3 | |
104 | 3 | token = strtok_r(wordlist->data, "\n", &state); |
105 | 685 | while(token) { |
106 | 682 | // expand array if needed |
107 | 682 | if(capacity == wordlist->count) { |
108 | 2 | capacity *= ALLOC_INCREASE; |
109 | 2 | wordlist->words = |
110 | 2 | realloc(wordlist->words, sizeof(char *) * capacity); |
111 | 2 | } |
112 | 682 | |
113 | 682 | // save word |
114 | 682 | wordlist->words[wordlist->count] = token; |
115 | 682 | wordlist->count++; |
116 | 682 | |
117 | 682 | // get next word |
118 | 682 | token = strtok_r(NULL, "\n", &state); |
119 | 682 | } |
120 | 3 | |
121 | 3 | // resize array finally (shrink it down to what is really needed) |
122 | 3 | wordlist->words = |
123 | 3 | realloc(wordlist->words, sizeof(char *) * wordlist->count); |
124 | 3 | } |
125 | | |
126 | | const char * |
127 | 4 | passgen_wordlist_random(passgen_wordlist *wordlist, passgen_random *random) { |
128 | 4 | if(!wordlist->parsed) { |
129 | 1 | return NULL; |
130 | 1 | } |
131 | 3 | size_t index = passgen_random_u64_max(random, wordlist->count); |
132 | 3 | return wordlist->words[index]; |
133 | 3 | } |
134 | | |
135 | 4 | void passgen_wordlist_free(passgen_wordlist *wordlist) { |
136 | 4 | passgen_markov_free(&wordlist->markov); |
137 | 4 | |
138 | 4 | if(wordlist->parsed) { |
139 | 3 | free(wordlist->words); |
140 | 3 | free(wordlist->data); |
141 | 3 | } |
142 | 4 | |
143 | 4 | if(wordlist->should_close_file && wordlist->file) { |
144 | 1 | fclose(wordlist->file); |
145 | 1 | } |
146 | 4 | } |
147 | | |
148 | 1 | size_t passgen_wordlist_count(passgen_wordlist *wordlist) { |
149 | 1 | return wordlist->count; |
150 | 1 | } |