Line data Source code
1 : #include "passgen/wordlist.h" 2 : #include "passgen/assert.h" 3 : #include "passgen/config.h" 4 : #include "passgen/util/utf8.h" 5 : #include "try.h" 6 : #include <stdlib.h> 7 : #include <string.h> 8 : 9 : #define ALLOC_INITIAL 256 10 : #define ALLOC_INCREASE 2 11 : 12 4 : void passgen_wordlist_init( 13 : passgen_wordlist *wordlist, 14 : FILE *file, 15 : size_t markov_length) { 16 4 : wordlist->parsed = false; 17 4 : wordlist->parsed_markov = false; 18 4 : wordlist->file = file; 19 4 : wordlist->should_close_file = true; 20 4 : passgen_markov_init(&wordlist->markov, markov_length); 21 4 : } 22 : 23 3 : int passgen_wordlist_parse(passgen_wordlist *wordlist) { 24 3 : passgen_assert(!wordlist->parsed); 25 3 : wordlist->parsed = true; 26 3 : try(passgen_wordlist_read(wordlist, wordlist->file)); 27 3 : wordlist->file = NULL; 28 3 : passgen_wordlist_scan(wordlist); 29 3 : return 0; 30 : } 31 : 32 3 : void passgen_wordlist_parse_markov(passgen_wordlist *wordlist) { 33 3 : passgen_assert(!wordlist->parsed_markov); 34 3 : wordlist->parsed_markov = true; 35 : 36 3 : size_t unicode_buffer_len = 256; 37 3 : uint32_t unicode_buffer[unicode_buffer_len]; 38 685 : for(size_t i = 0; i < wordlist->count; i++) { 39 682 : uint32_t *unicode_buffer_pos = &unicode_buffer[0]; 40 682 : const uint8_t *word = (const uint8_t *) wordlist->words[i]; 41 682 : size_t word_length = strlen((const char *) word); 42 : 43 : // decode utf8 44 682 : int ret = passgen_utf8_decode( 45 : &unicode_buffer_pos, 46 : unicode_buffer_len, 47 : NULL, 48 : &word, 49 : word_length); 50 : 51 : // make sure the conversion worked 52 682 : passgen_assert(ret == PASSGEN_UTF8_SUCCESS); 53 : 54 : // add word to markov chain 55 682 : passgen_markov_add( 56 : &wordlist->markov, 57 : unicode_buffer, 58 682 : unicode_buffer_pos - &unicode_buffer[0], 59 : 1); 60 : } 61 3 : } 62 : 63 3 : void passgen_wordlist_load( 64 : passgen_wordlist *wordlist, 65 : FILE *file, 66 : size_t markov_length) { 67 3 : passgen_wordlist_init(wordlist, file, markov_length); 68 3 : passgen_wordlist_parse(wordlist); 69 3 : passgen_wordlist_parse_markov(wordlist); 70 3 : } 71 : 72 3 : int passgen_wordlist_read(passgen_wordlist *wordlist, FILE *file) { 73 : // get size of file 74 3 : fseek(file, 0, SEEK_END); 75 3 : wordlist->size = ftell(file); 76 3 : fseek(file, 0, SEEK_SET); 77 : 78 : // allocate buffer to hold entire file and read in (plus zero termination) 79 3 : wordlist->data = malloc(wordlist->size + 1); 80 3 : passgen_assert(wordlist->data); 81 : 82 : // read in entire wordlist 83 3 : size_t read = fread(wordlist->data, 1, wordlist->size, file); 84 3 : if(read != wordlist->size) { 85 0 : return 1; 86 : } 87 : 88 : // null-terminate wordlist 89 3 : wordlist->data[wordlist->size] = 0; 90 : 91 3 : if(wordlist->should_close_file) { 92 3 : fclose(file); 93 : } 94 : 95 3 : return 0; 96 : } 97 : 98 3 : void passgen_wordlist_scan(passgen_wordlist *wordlist) { 99 3 : char *state = NULL, *token; 100 : 101 3 : size_t capacity = ALLOC_INITIAL; 102 3 : wordlist->words = malloc(sizeof(char *) * capacity); 103 3 : wordlist->count = 0; 104 : 105 3 : token = strtok_r(wordlist->data, "\n", &state); 106 685 : while(token) { 107 : // expand array if needed 108 682 : if(capacity == wordlist->count) { 109 2 : capacity *= ALLOC_INCREASE; 110 2 : wordlist->words = 111 2 : realloc(wordlist->words, sizeof(char *) * capacity); 112 : } 113 : 114 : // save word 115 682 : wordlist->words[wordlist->count] = token; 116 682 : wordlist->count++; 117 : 118 : // get next word 119 682 : token = strtok_r(NULL, "\n", &state); 120 : } 121 : 122 : // resize array finally (shrink it down to what is really needed) 123 3 : wordlist->words = 124 3 : realloc(wordlist->words, sizeof(char *) * wordlist->count); 125 3 : } 126 : 127 : const char * 128 4 : passgen_wordlist_random(passgen_wordlist *wordlist, passgen_random *random) { 129 4 : if(!wordlist->parsed) { 130 1 : return NULL; 131 : } 132 3 : size_t index = passgen_random_u64_max(random, wordlist->count); 133 3 : return wordlist->words[index]; 134 : } 135 : 136 4 : void passgen_wordlist_free(passgen_wordlist *wordlist) { 137 4 : passgen_markov_free(&wordlist->markov); 138 : 139 4 : if(wordlist->parsed) { 140 3 : free(wordlist->words); 141 3 : free(wordlist->data); 142 : } 143 : 144 4 : if(wordlist->should_close_file && wordlist->file) { 145 1 : fclose(wordlist->file); 146 : } 147 : 148 4 : PASSGEN_CLEAR(wordlist); 149 4 : } 150 : 151 1 : size_t passgen_wordlist_count(passgen_wordlist *wordlist) { 152 1 : return wordlist->count; 153 : }