Coverage Report

Created: 2024-05-03 06:05

/builds/xfbs/passgen/src/wordlist.c
Line
Count
Source (jump to first uncovered line)
1
#include "passgen/wordlist.h"
2
#include "passgen/assert.h"
3
#include "passgen/util/utf8.h"
4
#include "try.h"
5
#include <stdlib.h>
6
#include <string.h>
7
8
3
#define ALLOC_INITIAL  256
9
2
#define ALLOC_INCREASE 2
10
11
void passgen_wordlist_init(
12
    passgen_wordlist *wordlist,
13
    FILE *file,
14
4
    size_t markov_length) {
15
4
    wordlist->parsed = false;
16
4
    wordlist->parsed_markov = false;
17
4
    wordlist->file = file;
18
4
    wordlist->should_close_file = true;
19
4
    passgen_markov_init(&wordlist->markov, markov_length);
20
4
}
21
22
3
int passgen_wordlist_parse(passgen_wordlist *wordlist) {
23
3
    passgen_assert(!wordlist->parsed);
24
3
    wordlist->parsed = true;
25
3
    try(passgen_wordlist_read(wordlist, wordlist->file));
26
3
    wordlist->file = NULL;
27
3
    passgen_wordlist_scan(wordlist);
28
3
    return 0;
29
3
}
30
31
3
void passgen_wordlist_parse_markov(passgen_wordlist *wordlist) {
32
3
    passgen_assert(!wordlist->parsed_markov);
33
3
    wordlist->parsed_markov = true;
34
3
35
3
    size_t unicode_buffer_len = 256;
36
3
    uint32_t unicode_buffer[unicode_buffer_len];
37
685
    for(size_t i = 0; i < wordlist->count; 
i++682
) {
38
682
        uint32_t *unicode_buffer_pos = &unicode_buffer[0];
39
682
        const uint8_t *word = (const uint8_t *) wordlist->words[i];
40
682
        size_t word_length = strlen((const char *) word);
41
682
42
682
        // decode utf8
43
682
        int ret = passgen_utf8_decode(
44
682
            &unicode_buffer_pos,
45
682
            unicode_buffer_len,
46
682
            NULL,
47
682
            &word,
48
682
            word_length);
49
682
50
682
        // make sure the conversion worked
51
682
        passgen_assert(ret == PASSGEN_UTF8_SUCCESS);
52
682
53
682
        // add word to markov chain
54
682
        passgen_markov_add(
55
682
            &wordlist->markov,
56
682
            unicode_buffer,
57
682
            unicode_buffer_pos - &unicode_buffer[0],
58
682
            1);
59
682
    }
60
3
}
61
62
void passgen_wordlist_load(
63
    passgen_wordlist *wordlist,
64
    FILE *file,
65
3
    size_t markov_length) {
66
3
    passgen_wordlist_init(wordlist, file, markov_length);
67
3
    passgen_wordlist_parse(wordlist);
68
3
    passgen_wordlist_parse_markov(wordlist);
69
3
}
70
71
3
int passgen_wordlist_read(passgen_wordlist *wordlist, FILE *file) {
72
3
    // get size of file
73
3
    fseek(file, 0, SEEK_END);
74
3
    wordlist->size = ftell(file);
75
3
    fseek(file, 0, SEEK_SET);
76
3
77
3
    // allocate buffer to hold entire file and read in (plus zero termination)
78
3
    wordlist->data = malloc(wordlist->size + 1);
79
3
    passgen_assert(wordlist->data);
80
3
81
3
    // read in entire wordlist
82
3
    size_t read = fread(wordlist->data, 1, wordlist->size, file);
83
3
    if(read != wordlist->size) {
84
0
        return 1;
85
0
    }
86
3
87
3
    // null-terminate wordlist
88
3
    wordlist->data[wordlist->size] = 0;
89
3
90
3
    if(wordlist->should_close_file) {
91
3
        fclose(file);
92
3
    }
93
3
94
3
    return 0;
95
3
}
96
97
3
void passgen_wordlist_scan(passgen_wordlist *wordlist) {
98
3
    char *state = NULL, *token;
99
3
100
3
    size_t capacity = ALLOC_INITIAL;
101
3
    wordlist->words = malloc(sizeof(char *) * capacity);
102
3
    wordlist->count = 0;
103
3
104
3
    token = strtok_r(wordlist->data, "\n", &state);
105
685
    while(token) {
106
682
        // expand array if needed
107
682
        if(capacity == wordlist->count) {
108
2
            capacity *= ALLOC_INCREASE;
109
2
            wordlist->words =
110
2
                realloc(wordlist->words, sizeof(char *) * capacity);
111
2
        }
112
682
113
682
        // save word
114
682
        wordlist->words[wordlist->count] = token;
115
682
        wordlist->count++;
116
682
117
682
        // get next word
118
682
        token = strtok_r(NULL, "\n", &state);
119
682
    }
120
3
121
3
    // resize array finally (shrink it down to what is really needed)
122
3
    wordlist->words =
123
3
        realloc(wordlist->words, sizeof(char *) * wordlist->count);
124
3
}
125
126
const char *
127
4
passgen_wordlist_random(passgen_wordlist *wordlist, passgen_random *random) {
128
4
    if(!wordlist->parsed) {
129
1
        return NULL;
130
1
    }
131
3
    size_t index = passgen_random_u64_max(random, wordlist->count);
132
3
    return wordlist->words[index];
133
3
}
134
135
4
void passgen_wordlist_free(passgen_wordlist *wordlist) {
136
4
    passgen_markov_free(&wordlist->markov);
137
4
138
4
    if(wordlist->parsed) {
139
3
        free(wordlist->words);
140
3
        free(wordlist->data);
141
3
    }
142
4
143
4
    if(wordlist->should_close_file && wordlist->file) {
144
1
        fclose(wordlist->file);
145
1
    }
146
4
}
147
148
1
size_t passgen_wordlist_count(passgen_wordlist *wordlist) {
149
1
    return wordlist->count;
150
1
}