LCOV - code coverage report
Current view: top level - src - wordlist.c (source / functions) Hit Total Coverage
Test: passgen-test.info Lines: 77 78 98.7 %
Date: 2024-05-03 06:05:14 Functions: 9 9 100.0 %

          Line data    Source code
       1             : #include "passgen/wordlist.h"
       2             : #include "passgen/assert.h"
       3             : #include "passgen/util/utf8.h"
       4             : #include "try.h"
       5             : #include <stdlib.h>
       6             : #include <string.h>
       7             : 
       8             : #define ALLOC_INITIAL  256
       9             : #define ALLOC_INCREASE 2
      10             : 
      11           4 : void passgen_wordlist_init(
      12             :     passgen_wordlist *wordlist,
      13             :     FILE *file,
      14             :     size_t markov_length) {
      15           4 :     wordlist->parsed = false;
      16           4 :     wordlist->parsed_markov = false;
      17           4 :     wordlist->file = file;
      18           4 :     wordlist->should_close_file = true;
      19           4 :     passgen_markov_init(&wordlist->markov, markov_length);
      20           4 : }
      21             : 
      22           3 : int passgen_wordlist_parse(passgen_wordlist *wordlist) {
      23           3 :     passgen_assert(!wordlist->parsed);
      24           3 :     wordlist->parsed = true;
      25           3 :     try(passgen_wordlist_read(wordlist, wordlist->file));
      26           3 :     wordlist->file = NULL;
      27           3 :     passgen_wordlist_scan(wordlist);
      28           3 :     return 0;
      29             : }
      30             : 
      31           3 : void passgen_wordlist_parse_markov(passgen_wordlist *wordlist) {
      32           3 :     passgen_assert(!wordlist->parsed_markov);
      33           3 :     wordlist->parsed_markov = true;
      34             : 
      35           3 :     size_t unicode_buffer_len = 256;
      36           3 :     uint32_t unicode_buffer[unicode_buffer_len];
      37         685 :     for(size_t i = 0; i < wordlist->count; i++) {
      38         682 :         uint32_t *unicode_buffer_pos = &unicode_buffer[0];
      39         682 :         const uint8_t *word = (const uint8_t *) wordlist->words[i];
      40         682 :         size_t word_length = strlen((const char *) word);
      41             : 
      42             :         // decode utf8
      43         682 :         int ret = passgen_utf8_decode(
      44             :             &unicode_buffer_pos,
      45             :             unicode_buffer_len,
      46             :             NULL,
      47             :             &word,
      48             :             word_length);
      49             : 
      50             :         // make sure the conversion worked
      51         682 :         passgen_assert(ret == PASSGEN_UTF8_SUCCESS);
      52             : 
      53             :         // add word to markov chain
      54         682 :         passgen_markov_add(
      55             :             &wordlist->markov,
      56             :             unicode_buffer,
      57         682 :             unicode_buffer_pos - &unicode_buffer[0],
      58             :             1);
      59             :     }
      60           3 : }
      61             : 
      62           3 : void passgen_wordlist_load(
      63             :     passgen_wordlist *wordlist,
      64             :     FILE *file,
      65             :     size_t markov_length) {
      66           3 :     passgen_wordlist_init(wordlist, file, markov_length);
      67           3 :     passgen_wordlist_parse(wordlist);
      68           3 :     passgen_wordlist_parse_markov(wordlist);
      69           3 : }
      70             : 
      71           3 : int passgen_wordlist_read(passgen_wordlist *wordlist, FILE *file) {
      72             :     // get size of file
      73           3 :     fseek(file, 0, SEEK_END);
      74           3 :     wordlist->size = ftell(file);
      75           3 :     fseek(file, 0, SEEK_SET);
      76             : 
      77             :     // allocate buffer to hold entire file and read in (plus zero termination)
      78           3 :     wordlist->data = malloc(wordlist->size + 1);
      79           3 :     passgen_assert(wordlist->data);
      80             : 
      81             :     // read in entire wordlist
      82           3 :     size_t read = fread(wordlist->data, 1, wordlist->size, file);
      83           3 :     if(read != wordlist->size) {
      84           0 :         return 1;
      85             :     }
      86             : 
      87             :     // null-terminate wordlist
      88           3 :     wordlist->data[wordlist->size] = 0;
      89             : 
      90           3 :     if(wordlist->should_close_file) {
      91           3 :         fclose(file);
      92             :     }
      93             : 
      94           3 :     return 0;
      95             : }
      96             : 
      97           3 : void passgen_wordlist_scan(passgen_wordlist *wordlist) {
      98           3 :     char *state = NULL, *token;
      99             : 
     100           3 :     size_t capacity = ALLOC_INITIAL;
     101           3 :     wordlist->words = malloc(sizeof(char *) * capacity);
     102           3 :     wordlist->count = 0;
     103             : 
     104           3 :     token = strtok_r(wordlist->data, "\n", &state);
     105         685 :     while(token) {
     106             :         // expand array if needed
     107         682 :         if(capacity == wordlist->count) {
     108           2 :             capacity *= ALLOC_INCREASE;
     109           2 :             wordlist->words =
     110           2 :                 realloc(wordlist->words, sizeof(char *) * capacity);
     111             :         }
     112             : 
     113             :         // save word
     114         682 :         wordlist->words[wordlist->count] = token;
     115         682 :         wordlist->count++;
     116             : 
     117             :         // get next word
     118         682 :         token = strtok_r(NULL, "\n", &state);
     119             :     }
     120             : 
     121             :     // resize array finally (shrink it down to what is really needed)
     122           3 :     wordlist->words =
     123           3 :         realloc(wordlist->words, sizeof(char *) * wordlist->count);
     124           3 : }
     125             : 
     126             : const char *
     127           4 : passgen_wordlist_random(passgen_wordlist *wordlist, passgen_random *random) {
     128           4 :     if(!wordlist->parsed) {
     129           1 :         return NULL;
     130             :     }
     131           3 :     size_t index = passgen_random_u64_max(random, wordlist->count);
     132           3 :     return wordlist->words[index];
     133             : }
     134             : 
     135           4 : void passgen_wordlist_free(passgen_wordlist *wordlist) {
     136           4 :     passgen_markov_free(&wordlist->markov);
     137             : 
     138           4 :     if(wordlist->parsed) {
     139           3 :         free(wordlist->words);
     140           3 :         free(wordlist->data);
     141             :     }
     142             : 
     143           4 :     if(wordlist->should_close_file && wordlist->file) {
     144           1 :         fclose(wordlist->file);
     145             :     }
     146           4 : }
     147             : 
     148           1 : size_t passgen_wordlist_count(passgen_wordlist *wordlist) {
     149           1 :     return wordlist->count;
     150             : }

Generated by: LCOV version 1.14