LCOV - code coverage report
Current view: top level - src - wordlist.c (source / functions) Hit Total Coverage
Test: passgen-test.info Lines: 78 79 98.7 %
Date: 2024-11-29 06:05:05 Functions: 9 9 100.0 %

          Line data    Source code
       1             : #include "passgen/wordlist.h"
       2             : #include "passgen/assert.h"
       3             : #include "passgen/config.h"
       4             : #include "passgen/util/utf8.h"
       5             : #include "try.h"
       6             : #include <stdlib.h>
       7             : #include <string.h>
       8             : 
       9             : #define ALLOC_INITIAL  256
      10             : #define ALLOC_INCREASE 2
      11             : 
      12           4 : void passgen_wordlist_init(
      13             :     passgen_wordlist *wordlist,
      14             :     FILE *file,
      15             :     size_t markov_length) {
      16           4 :     wordlist->parsed = false;
      17           4 :     wordlist->parsed_markov = false;
      18           4 :     wordlist->file = file;
      19           4 :     wordlist->should_close_file = true;
      20           4 :     passgen_markov_init(&wordlist->markov, markov_length);
      21           4 : }
      22             : 
      23           3 : int passgen_wordlist_parse(passgen_wordlist *wordlist) {
      24           3 :     passgen_assert(!wordlist->parsed);
      25           3 :     wordlist->parsed = true;
      26           3 :     try(passgen_wordlist_read(wordlist, wordlist->file));
      27           3 :     wordlist->file = NULL;
      28           3 :     passgen_wordlist_scan(wordlist);
      29           3 :     return 0;
      30             : }
      31             : 
      32           3 : void passgen_wordlist_parse_markov(passgen_wordlist *wordlist) {
      33           3 :     passgen_assert(!wordlist->parsed_markov);
      34           3 :     wordlist->parsed_markov = true;
      35             : 
      36           3 :     size_t unicode_buffer_len = 256;
      37           3 :     uint32_t unicode_buffer[unicode_buffer_len];
      38         685 :     for(size_t i = 0; i < wordlist->count; i++) {
      39         682 :         uint32_t *unicode_buffer_pos = &unicode_buffer[0];
      40         682 :         const uint8_t *word = (const uint8_t *) wordlist->words[i];
      41         682 :         size_t word_length = strlen((const char *) word);
      42             : 
      43             :         // decode utf8
      44         682 :         int ret = passgen_utf8_decode(
      45             :             &unicode_buffer_pos,
      46             :             unicode_buffer_len,
      47             :             NULL,
      48             :             &word,
      49             :             word_length);
      50             : 
      51             :         // make sure the conversion worked
      52         682 :         passgen_assert(ret == PASSGEN_UTF8_SUCCESS);
      53             : 
      54             :         // add word to markov chain
      55         682 :         passgen_markov_add(
      56             :             &wordlist->markov,
      57             :             unicode_buffer,
      58         682 :             unicode_buffer_pos - &unicode_buffer[0],
      59             :             1);
      60             :     }
      61           3 : }
      62             : 
      63           3 : void passgen_wordlist_load(
      64             :     passgen_wordlist *wordlist,
      65             :     FILE *file,
      66             :     size_t markov_length) {
      67           3 :     passgen_wordlist_init(wordlist, file, markov_length);
      68           3 :     passgen_wordlist_parse(wordlist);
      69           3 :     passgen_wordlist_parse_markov(wordlist);
      70           3 : }
      71             : 
      72           3 : int passgen_wordlist_read(passgen_wordlist *wordlist, FILE *file) {
      73             :     // get size of file
      74           3 :     fseek(file, 0, SEEK_END);
      75           3 :     wordlist->size = ftell(file);
      76           3 :     fseek(file, 0, SEEK_SET);
      77             : 
      78             :     // allocate buffer to hold entire file and read in (plus zero termination)
      79           3 :     wordlist->data = malloc(wordlist->size + 1);
      80           3 :     passgen_assert(wordlist->data);
      81             : 
      82             :     // read in entire wordlist
      83           3 :     size_t read = fread(wordlist->data, 1, wordlist->size, file);
      84           3 :     if(read != wordlist->size) {
      85           0 :         return 1;
      86             :     }
      87             : 
      88             :     // null-terminate wordlist
      89           3 :     wordlist->data[wordlist->size] = 0;
      90             : 
      91           3 :     if(wordlist->should_close_file) {
      92           3 :         fclose(file);
      93             :     }
      94             : 
      95           3 :     return 0;
      96             : }
      97             : 
      98           3 : void passgen_wordlist_scan(passgen_wordlist *wordlist) {
      99           3 :     char *state = NULL, *token;
     100             : 
     101           3 :     size_t capacity = ALLOC_INITIAL;
     102           3 :     wordlist->words = malloc(sizeof(char *) * capacity);
     103           3 :     wordlist->count = 0;
     104             : 
     105           3 :     token = strtok_r(wordlist->data, "\n", &state);
     106         685 :     while(token) {
     107             :         // expand array if needed
     108         682 :         if(capacity == wordlist->count) {
     109           2 :             capacity *= ALLOC_INCREASE;
     110           2 :             wordlist->words =
     111           2 :                 realloc(wordlist->words, sizeof(char *) * capacity);
     112             :         }
     113             : 
     114             :         // save word
     115         682 :         wordlist->words[wordlist->count] = token;
     116         682 :         wordlist->count++;
     117             : 
     118             :         // get next word
     119         682 :         token = strtok_r(NULL, "\n", &state);
     120             :     }
     121             : 
     122             :     // resize array finally (shrink it down to what is really needed)
     123           3 :     wordlist->words =
     124           3 :         realloc(wordlist->words, sizeof(char *) * wordlist->count);
     125           3 : }
     126             : 
     127             : const char *
     128           4 : passgen_wordlist_random(passgen_wordlist *wordlist, passgen_random *random) {
     129           4 :     if(!wordlist->parsed) {
     130           1 :         return NULL;
     131             :     }
     132           3 :     size_t index = passgen_random_u64_max(random, wordlist->count);
     133           3 :     return wordlist->words[index];
     134             : }
     135             : 
     136           4 : void passgen_wordlist_free(passgen_wordlist *wordlist) {
     137           4 :     passgen_markov_free(&wordlist->markov);
     138             : 
     139           4 :     if(wordlist->parsed) {
     140           3 :         free(wordlist->words);
     141           3 :         free(wordlist->data);
     142             :     }
     143             : 
     144           4 :     if(wordlist->should_close_file && wordlist->file) {
     145           1 :         fclose(wordlist->file);
     146             :     }
     147             : 
     148           4 :     PASSGEN_CLEAR(wordlist);
     149           4 : }
     150             : 
     151           1 : size_t passgen_wordlist_count(passgen_wordlist *wordlist) {
     152           1 :     return wordlist->count;
     153             : }

Generated by: LCOV version 1.14