Line data Source code
1 : #include "passgen/wordlist.h" 2 : #include "passgen/assert.h" 3 : #include "passgen/util/utf8.h" 4 : #include "try.h" 5 : #include <stdlib.h> 6 : #include <string.h> 7 : 8 : #define ALLOC_INITIAL 256 9 : #define ALLOC_INCREASE 2 10 : 11 4 : void passgen_wordlist_init( 12 : passgen_wordlist *wordlist, 13 : FILE *file, 14 : size_t markov_length) { 15 4 : wordlist->parsed = false; 16 4 : wordlist->parsed_markov = false; 17 4 : wordlist->file = file; 18 4 : wordlist->should_close_file = true; 19 4 : passgen_markov_init(&wordlist->markov, markov_length); 20 4 : } 21 : 22 3 : int passgen_wordlist_parse(passgen_wordlist *wordlist) { 23 3 : passgen_assert(!wordlist->parsed); 24 3 : wordlist->parsed = true; 25 3 : try(passgen_wordlist_read(wordlist, wordlist->file)); 26 3 : wordlist->file = NULL; 27 3 : passgen_wordlist_scan(wordlist); 28 3 : return 0; 29 : } 30 : 31 3 : void passgen_wordlist_parse_markov(passgen_wordlist *wordlist) { 32 3 : passgen_assert(!wordlist->parsed_markov); 33 3 : wordlist->parsed_markov = true; 34 : 35 3 : size_t unicode_buffer_len = 256; 36 3 : uint32_t unicode_buffer[unicode_buffer_len]; 37 685 : for(size_t i = 0; i < wordlist->count; i++) { 38 682 : uint32_t *unicode_buffer_pos = &unicode_buffer[0]; 39 682 : const uint8_t *word = (const uint8_t *) wordlist->words[i]; 40 682 : size_t word_length = strlen((const char *) word); 41 : 42 : // decode utf8 43 682 : int ret = passgen_utf8_decode( 44 : &unicode_buffer_pos, 45 : unicode_buffer_len, 46 : NULL, 47 : &word, 48 : word_length); 49 : 50 : // make sure the conversion worked 51 682 : passgen_assert(ret == PASSGEN_UTF8_SUCCESS); 52 : 53 : // add word to markov chain 54 682 : passgen_markov_add( 55 : &wordlist->markov, 56 : unicode_buffer, 57 682 : unicode_buffer_pos - &unicode_buffer[0], 58 : 1); 59 : } 60 3 : } 61 : 62 3 : void passgen_wordlist_load( 63 : passgen_wordlist *wordlist, 64 : FILE *file, 65 : size_t markov_length) { 66 3 : passgen_wordlist_init(wordlist, file, markov_length); 67 3 : passgen_wordlist_parse(wordlist); 68 3 : passgen_wordlist_parse_markov(wordlist); 69 3 : } 70 : 71 3 : int passgen_wordlist_read(passgen_wordlist *wordlist, FILE *file) { 72 : // get size of file 73 3 : fseek(file, 0, SEEK_END); 74 3 : wordlist->size = ftell(file); 75 3 : fseek(file, 0, SEEK_SET); 76 : 77 : // allocate buffer to hold entire file and read in (plus zero termination) 78 3 : wordlist->data = malloc(wordlist->size + 1); 79 3 : passgen_assert(wordlist->data); 80 : 81 : // read in entire wordlist 82 3 : size_t read = fread(wordlist->data, 1, wordlist->size, file); 83 3 : if(read != wordlist->size) { 84 0 : return 1; 85 : } 86 : 87 : // null-terminate wordlist 88 3 : wordlist->data[wordlist->size] = 0; 89 : 90 3 : if(wordlist->should_close_file) { 91 3 : fclose(file); 92 : } 93 : 94 3 : return 0; 95 : } 96 : 97 3 : void passgen_wordlist_scan(passgen_wordlist *wordlist) { 98 3 : char *state = NULL, *token; 99 : 100 3 : size_t capacity = ALLOC_INITIAL; 101 3 : wordlist->words = malloc(sizeof(char *) * capacity); 102 3 : wordlist->count = 0; 103 : 104 3 : token = strtok_r(wordlist->data, "\n", &state); 105 685 : while(token) { 106 : // expand array if needed 107 682 : if(capacity == wordlist->count) { 108 2 : capacity *= ALLOC_INCREASE; 109 2 : wordlist->words = 110 2 : realloc(wordlist->words, sizeof(char *) * capacity); 111 : } 112 : 113 : // save word 114 682 : wordlist->words[wordlist->count] = token; 115 682 : wordlist->count++; 116 : 117 : // get next word 118 682 : token = strtok_r(NULL, "\n", &state); 119 : } 120 : 121 : // resize array finally (shrink it down to what is really needed) 122 3 : wordlist->words = 123 3 : realloc(wordlist->words, sizeof(char *) * wordlist->count); 124 3 : } 125 : 126 : const char * 127 4 : passgen_wordlist_random(passgen_wordlist *wordlist, passgen_random *random) { 128 4 : if(!wordlist->parsed) { 129 1 : return NULL; 130 : } 131 3 : size_t index = passgen_random_u64_max(random, wordlist->count); 132 3 : return wordlist->words[index]; 133 : } 134 : 135 4 : void passgen_wordlist_free(passgen_wordlist *wordlist) { 136 4 : passgen_markov_free(&wordlist->markov); 137 : 138 4 : if(wordlist->parsed) { 139 3 : free(wordlist->words); 140 3 : free(wordlist->data); 141 : } 142 : 143 4 : if(wordlist->should_close_file && wordlist->file) { 144 1 : fclose(wordlist->file); 145 : } 146 4 : } 147 : 148 1 : size_t passgen_wordlist_count(passgen_wordlist *wordlist) { 149 1 : return wordlist->count; 150 : }