Line data Source code
1 : #include "passgen/markov.h"
2 : #include "tests.h"
3 : #include <stdlib.h>
4 :
5 : #define SEED 234720984723
6 :
7 : #define passgen_markov_leaf_count_raw(leaf, codepoint) \
8 : leaf->entries[leaf->capacity].count[codepoint % leaf->capacity]
9 :
10 : #define passgen_markov_leaf_codepoint_raw(leaf, index) \
11 : leaf->entries[0].codepoint[index % leaf->capacity]
12 :
13 : #define passgen_random_unicode(rand) (passgen_random_u32(rand) & 0x10FFFF)
14 :
15 1 : test_result test_markov_node_layout(void) {
16 1 : passgen_markov_node *node = passgen_markov_node_new(0);
17 :
18 1 : assert_eq(node->capacity, 3);
19 :
20 : assert_eq(
21 : &passgen_markov_node_codepoint(node, 0),
22 : ((void *) node) + sizeof(size_t));
23 1 : assert_eq(
24 : &passgen_markov_node_codepoint(node, 1),
25 : ((void *) node) + sizeof(size_t) + 1 * sizeof(uint32_t));
26 1 : assert_eq(
27 : &passgen_markov_node_codepoint(node, 2),
28 : ((void *) node) + sizeof(size_t) + 2 * sizeof(uint32_t));
29 :
30 1 : assert_eq(
31 : &passgen_markov_node_child(node, 0),
32 : ((void *) node) + sizeof(size_t) + 4 * sizeof(uint32_t));
33 1 : assert_eq(
34 : &passgen_markov_node_child(node, 1),
35 : ((void *) node) + 2 * sizeof(size_t) + 4 * sizeof(uint32_t));
36 1 : assert_eq(
37 : &passgen_markov_node_child(node, 2),
38 : ((void *) node) + 3 * sizeof(size_t) + 4 * sizeof(uint32_t));
39 :
40 1 : free(node);
41 :
42 1 : return test_ok;
43 : }
44 :
45 1 : test_result test_markov_leaf_layout(void) {
46 1 : passgen_markov_leaf *leaf = passgen_markov_leaf_new(0);
47 :
48 1 : assert_eq(leaf->capacity, 3);
49 :
50 : assert_eq(
51 : &passgen_markov_leaf_codepoint_raw(leaf, 0),
52 : ((void *) leaf) + 2 * sizeof(size_t));
53 1 : assert_eq(
54 : &passgen_markov_leaf_codepoint_raw(leaf, 1),
55 : ((void *) leaf) + 2 * sizeof(size_t) + 1 * sizeof(uint32_t));
56 1 : assert_eq(
57 : &passgen_markov_leaf_codepoint_raw(leaf, 2),
58 : ((void *) leaf) + 2 * sizeof(size_t) + 2 * sizeof(uint32_t));
59 :
60 1 : assert_eq(
61 : &passgen_markov_leaf_count_raw(leaf, 0),
62 : ((void *) leaf) + 2 * sizeof(size_t) + 3 * sizeof(uint32_t));
63 1 : assert_eq(
64 : &passgen_markov_leaf_count_raw(leaf, 1),
65 : ((void *) leaf) + 2 * sizeof(size_t) + 4 * sizeof(uint32_t));
66 1 : assert_eq(
67 : &passgen_markov_leaf_count_raw(leaf, 2),
68 : ((void *) leaf) + 2 * sizeof(size_t) + 5 * sizeof(uint32_t));
69 :
70 1 : free(leaf);
71 :
72 1 : return test_ok;
73 : }
74 :
75 1 : test_result test_markov_node_insert(void) {
76 1 : passgen_markov_node *node = passgen_markov_node_new(0);
77 1 : assert_eq(node->capacity, 3);
78 :
79 1 : node = passgen_markov_node_insert(node, 0);
80 1 : assert(!passgen_markov_node_child(node, 0).node);
81 1 : passgen_markov_node_child(node, 0).node = (passgen_markov_node *) &node;
82 :
83 1 : node = passgen_markov_node_insert(node, 1);
84 1 : assert(!passgen_markov_node_child(node, 1).node);
85 1 : passgen_markov_node_child(node, 1).node = (passgen_markov_node *) &node;
86 :
87 1 : node = passgen_markov_node_insert(node, 2);
88 1 : assert(!passgen_markov_node_child(node, 2).node);
89 1 : passgen_markov_node_child(node, 2).node = (passgen_markov_node *) &node;
90 :
91 1 : assert_eq(node->capacity, 3);
92 :
93 1 : node = passgen_markov_node_insert(node, 3);
94 1 : assert(!passgen_markov_node_child(node, 3).node);
95 1 : passgen_markov_node_child(node, 3).node = (passgen_markov_node *) &node;
96 :
97 1 : assert_eq(node->capacity, 7);
98 :
99 1 : node = passgen_markov_node_insert(node, 4);
100 1 : assert(!passgen_markov_node_child(node, 4).node);
101 1 : passgen_markov_node_child(node, 4).node = (passgen_markov_node *) &node;
102 :
103 1 : node = passgen_markov_node_insert(node, 5);
104 1 : assert(!passgen_markov_node_child(node, 5).node);
105 1 : passgen_markov_node_child(node, 5).node = (passgen_markov_node *) &node;
106 :
107 1 : node = passgen_markov_node_insert(node, 6);
108 1 : assert(!passgen_markov_node_child(node, 6).node);
109 1 : passgen_markov_node_child(node, 6).node = (passgen_markov_node *) &node;
110 :
111 1 : node = passgen_markov_node_insert(node, 7);
112 1 : assert(!passgen_markov_node_child(node, 7).node);
113 1 : passgen_markov_node_child(node, 7).node = (passgen_markov_node *) &node;
114 :
115 1 : assert_eq(node->capacity, 17);
116 :
117 993 : for(size_t i = 8; i < 1000; i++) {
118 992 : node = passgen_markov_node_insert(node, i);
119 992 : assert(!passgen_markov_node_child(node, i).node);
120 992 : passgen_markov_node_child(node, i).node = (passgen_markov_node *) &node;
121 : }
122 :
123 1 : assert_eq(node->capacity, 1361);
124 :
125 1 : free(node);
126 :
127 1 : return test_ok;
128 : }
129 :
130 1 : test_result test_markov_leaf_insert(void) {
131 1 : passgen_markov_leaf *leaf = passgen_markov_leaf_new(0);
132 1 : assert_eq(leaf->capacity, 3);
133 :
134 1 : leaf = passgen_markov_leaf_insert(leaf, 0, 1);
135 1 : assert(passgen_markov_leaf_count_raw(leaf, 0) == 1);
136 :
137 1 : leaf = passgen_markov_leaf_insert(leaf, 0, 1);
138 1 : assert(passgen_markov_leaf_count_raw(leaf, 0) == 2);
139 :
140 1 : leaf = passgen_markov_leaf_insert(leaf, 1, 3);
141 1 : assert(passgen_markov_leaf_count_raw(leaf, 1) == 3);
142 :
143 1 : leaf = passgen_markov_leaf_insert(leaf, 2, 7);
144 1 : assert(passgen_markov_leaf_count_raw(leaf, 2) == 7);
145 :
146 1 : assert_eq(leaf->capacity, 3);
147 :
148 1 : leaf = passgen_markov_leaf_insert(leaf, 3, 2);
149 1 : assert(passgen_markov_leaf_count_raw(leaf, 3) == 2);
150 :
151 1 : assert_eq(leaf->capacity, 7);
152 :
153 1 : leaf = passgen_markov_leaf_insert(leaf, 4, 4);
154 1 : assert(passgen_markov_leaf_count_raw(leaf, 4) == 4);
155 :
156 1 : leaf = passgen_markov_leaf_insert(leaf, 5, 6);
157 1 : assert(passgen_markov_leaf_count_raw(leaf, 5) == 6);
158 :
159 1 : leaf = passgen_markov_leaf_insert(leaf, 6, 8);
160 1 : assert(passgen_markov_leaf_count_raw(leaf, 6) == 8);
161 :
162 1 : leaf = passgen_markov_leaf_insert(leaf, 7, 10);
163 1 : assert(passgen_markov_leaf_count_raw(leaf, 7) == 10);
164 :
165 1 : assert_eq(leaf->capacity, 17);
166 :
167 993 : for(size_t i = 8; i < 1000; i++) {
168 992 : leaf = passgen_markov_leaf_insert(leaf, i, i);
169 992 : assert(passgen_markov_leaf_count_raw(leaf, i) == i);
170 : }
171 :
172 1 : assert_eq(leaf->capacity, 1361);
173 :
174 1 : free(leaf);
175 :
176 1 : return test_ok;
177 : }
178 :
179 1 : test_result test_markov_node_insert_word(void) {
180 1 : passgen_markov_node *root_node = passgen_markov_node_new(0);
181 :
182 1 : const uint32_t word[] = {'h', 'e', 'l', 'l', 'o'};
183 :
184 : root_node =
185 1 : passgen_markov_node_insert_word(root_node, (void *) &word, 5, 1);
186 1 : passgen_markov_node *node = root_node;
187 :
188 1 : assert_eq(passgen_markov_node_codepoint(node, 'h'), 'h');
189 1 : node = passgen_markov_node_child(node, 'h').node;
190 1 : assert(node);
191 :
192 1 : assert_eq(passgen_markov_node_codepoint(node, 'e'), 'e');
193 1 : node = passgen_markov_node_child(node, 'e').node;
194 1 : assert(node);
195 :
196 1 : assert_eq(passgen_markov_node_codepoint(node, 'l'), 'l');
197 1 : node = passgen_markov_node_child(node, 'l').node;
198 1 : assert(node);
199 :
200 1 : assert_eq(passgen_markov_node_codepoint(node, 'l'), 'l');
201 1 : passgen_markov_leaf *leaf = passgen_markov_node_child(node, 'l').leaf;
202 1 : assert(leaf);
203 1 : assert_eq(leaf->capacity, 3);
204 1 : assert_eq(leaf->total_count, 1);
205 :
206 1 : assert_eq(passgen_markov_leaf_codepoint_raw(leaf, 'o'), 'o');
207 1 : assert_eq(passgen_markov_leaf_count_raw(leaf, 'o'), 1);
208 :
209 1 : passgen_markov_node_free(root_node, 4);
210 :
211 1 : return test_ok;
212 : }
213 :
214 1 : test_result test_markov_add(void) {
215 : passgen_markov markov;
216 : passgen_markov_node *node;
217 : passgen_markov_leaf *leaf;
218 :
219 : // markov chain level 2, meaning two prefix chars
220 1 : passgen_markov_init(&markov, 2);
221 :
222 : // this should have added 00a and 0a0 into the chain.
223 1 : passgen_markov_add(&markov, (void *) &(const uint32_t[]){'a'}, 1, 1);
224 :
225 : // verify 00a is there
226 1 : node = passgen_markov_node_child(markov.root, 0).node;
227 1 : assert(node);
228 1 : leaf = passgen_markov_node_child(node, 0).leaf;
229 1 : assert(leaf);
230 1 : assert_eq(leaf->total_count, 1);
231 1 : assert_eq(passgen_markov_leaf_codepoint_raw(leaf, 'a'), 'a');
232 1 : assert_eq(passgen_markov_leaf_count_raw(leaf, 'a'), 1);
233 :
234 : // verify 0a0 is there
235 1 : node = passgen_markov_node_child(markov.root, 0).node;
236 1 : assert(node);
237 1 : leaf = passgen_markov_node_child(node, 'a').leaf;
238 1 : assert(leaf);
239 1 : assert_eq(leaf->total_count, 1);
240 1 : assert_eq(passgen_markov_leaf_codepoint_raw(leaf, 0), 0);
241 1 : assert_eq(passgen_markov_leaf_count_raw(leaf, 0), 1);
242 :
243 : // this should have added 00l, 0la, la0, each with a weight of 2.
244 1 : passgen_markov_add(&markov, (void *) &(const uint32_t[]){'l', 'a'}, 2, 2);
245 :
246 : // verify 00l is there
247 1 : node = passgen_markov_node_child(markov.root, 0).node;
248 1 : assert(node);
249 1 : leaf = passgen_markov_node_child(node, 0).leaf;
250 1 : assert(leaf);
251 1 : assert_eq(leaf->total_count, 3);
252 1 : assert_eq(passgen_markov_leaf_codepoint_raw(leaf, 'l'), 'l');
253 1 : assert_eq(passgen_markov_leaf_count_raw(leaf, 'l'), 2);
254 :
255 : // verify 0la is there
256 1 : node = passgen_markov_node_child(markov.root, 0).node;
257 1 : assert(node);
258 1 : leaf = passgen_markov_node_child(node, 'l').leaf;
259 1 : assert(leaf);
260 1 : assert_eq(leaf->total_count, 2);
261 1 : assert_eq(passgen_markov_leaf_codepoint_raw(leaf, 'a'), 'a');
262 1 : assert_eq(passgen_markov_leaf_count_raw(leaf, 'a'), 2);
263 :
264 : // verify la0 is there
265 1 : node = passgen_markov_node_child(markov.root, 'l').node;
266 1 : assert(node);
267 1 : leaf = passgen_markov_node_child(node, 'a').leaf;
268 1 : assert(leaf);
269 1 : assert_eq(leaf->total_count, 2);
270 1 : assert_eq(passgen_markov_leaf_codepoint_raw(leaf, 0), 0);
271 1 : assert_eq(passgen_markov_leaf_count_raw(leaf, 0), 2);
272 :
273 1 : passgen_markov_add(&markov, (void *) &(const uint32_t[]){'l', 'e'}, 2, 1);
274 1 : passgen_markov_add(
275 : &markov,
276 1 : (void *) &(const uint32_t[]){'t', 'h', 'e'},
277 : 3,
278 : 1);
279 1 : passgen_markov_add(
280 : &markov,
281 1 : (void *) &(const uint32_t[]){'p', 'a', 'r', 't'},
282 : 4,
283 : 1);
284 1 : passgen_markov_add(
285 : &markov,
286 1 : (void *) &(const uint32_t[]){'p', 'h', 'o', 'n', 'e'},
287 : 5,
288 : 1);
289 :
290 1 : passgen_markov_free(&markov);
291 :
292 1 : return test_ok;
293 : }
294 :
295 1 : test_result test_markov_add_random(void) {
296 : passgen_markov markov;
297 : passgen_random rand;
298 1 : passgen_random_xorshift_open(&rand, SEED);
299 1 : uint32_t word[12] = {0};
300 1 : size_t word_count = 200;
301 :
302 : // try this for varying markov chain lengths
303 8 : for(size_t length = 3; length < 10; length++) {
304 7 : passgen_markov_init(&markov, length);
305 :
306 : // insert words
307 1407 : for(size_t count = 0; count < word_count; count++) {
308 : // generate random word
309 18200 : for(size_t offset = 0; offset < 12; offset++) {
310 : // generate only non-null, valid unicode codepoints.
311 : do {
312 16800 : word[offset] = passgen_random_unicode(&rand);
313 16800 : } while(word[offset] == 0);
314 : }
315 :
316 1400 : passgen_markov_add(&markov, &word[0], 12, 1);
317 : }
318 :
319 7 : passgen_markov_free(&markov);
320 : }
321 :
322 1 : passgen_random_close(&rand);
323 :
324 1 : return test_ok;
325 : }
326 :
327 1 : test_result test_markov_generate_random(void) {
328 : passgen_markov markov;
329 : passgen_random rand;
330 1 : passgen_random_xorshift_open(&rand, SEED);
331 1 : uint32_t word[12] = {0};
332 1 : size_t word_count = 1000;
333 1 : size_t length = 3;
334 :
335 1 : passgen_markov_init(&markov, length);
336 :
337 : // insert words
338 1001 : for(size_t count = 0; count < word_count; count++) {
339 : // generate random word
340 13000 : for(size_t offset = 0; offset < 12; offset++) {
341 : do {
342 12000 : word[offset] = passgen_random_unicode(&rand);
343 12000 : } while(word[offset] == 0);
344 : }
345 :
346 1000 : passgen_markov_add(&markov, &word[0], 12, 1);
347 : }
348 :
349 : // generate word_count words
350 1001 : for(size_t count = 0; count < word_count; count++) {
351 : // clear word
352 4000 : for(int i = 0; i < length; i++) {
353 3000 : word[i] = 0;
354 : }
355 :
356 : uint32_t next;
357 : do {
358 : // generate next codepoint
359 13000 : next = passgen_markov_generate(&markov, &word[0], &rand, NULL);
360 :
361 : // shift characters
362 39000 : for(int i = 1; i < length; i++) {
363 26000 : word[i - 1] = word[i];
364 : }
365 :
366 : // save next
367 13000 : word[length - 1] = next;
368 13000 : } while(next);
369 : }
370 :
371 1 : passgen_markov_free(&markov);
372 1 : passgen_random_close(&rand);
373 :
374 1 : return test_ok;
375 : }
376 :
377 1 : test_result test_markov_leaf_count_random(void) {
378 : passgen_random rand;
379 1 : passgen_random_xorshift_open(&rand, SEED);
380 1 : passgen_markov_leaf *leaf = passgen_markov_leaf_new(0);
381 :
382 10001 : for(size_t count = 0; count < 10000; count++) {
383 10000 : uint32_t codepoint = passgen_random_unicode(&rand);
384 10000 : uint32_t before = passgen_markov_leaf_count(leaf, codepoint);
385 10000 : assert_eq(before, 0);
386 : }
387 :
388 1 : passgen_markov_leaf_free(leaf);
389 1 : passgen_random_close(&rand);
390 :
391 1 : return test_ok;
392 : }
393 :
394 1 : test_result test_markov_leaf_insert_random(void) {
395 : passgen_random rand;
396 1 : passgen_random_xorshift_open(&rand, SEED);
397 1 : passgen_markov_leaf *leaf = passgen_markov_leaf_new(0);
398 :
399 1001 : for(size_t count = 0; count < 1000; count++) {
400 1000 : uint32_t codepoint = passgen_random_unicode(&rand);
401 1000 : uint32_t weight = passgen_random_u16(&rand);
402 1000 : uint32_t before = passgen_markov_leaf_count(leaf, codepoint);
403 1000 : leaf = passgen_markov_leaf_insert(leaf, codepoint, weight);
404 1000 : uint32_t after = passgen_markov_leaf_count(leaf, codepoint);
405 1000 : assert_eq(after - before, weight);
406 : }
407 :
408 1 : passgen_markov_leaf_free(leaf);
409 1 : passgen_random_close(&rand);
410 :
411 1 : return test_ok;
412 : }
413 :
414 1 : test_result test_markov_leaf_insert_sequential(void) {
415 : passgen_random rand;
416 1 : passgen_random_xorshift_open(&rand, SEED);
417 1 : passgen_markov_leaf *leaf = passgen_markov_leaf_new(0);
418 :
419 100001 : for(size_t count = 0; count < 100000; count++) {
420 100000 : uint32_t codepoint = count;
421 100000 : uint32_t weight = passgen_random_u16(&rand);
422 100000 : uint32_t before = passgen_markov_leaf_count(leaf, codepoint);
423 100000 : leaf = passgen_markov_leaf_insert(leaf, codepoint, weight);
424 100000 : uint32_t after = passgen_markov_leaf_count(leaf, codepoint);
425 100000 : assert_eq(after - before, weight);
426 : }
427 :
428 1 : passgen_markov_leaf_free(leaf);
429 1 : passgen_random_close(&rand);
430 :
431 1 : return test_ok;
432 : }
433 :
434 1 : test_result test_markov_generate(void) {
435 1 : passgen_random *rand = passgen_random_xorshift_open(NULL, SEED);
436 : // passgen_random_xorshift_open(&rand, SEED);
437 :
438 : passgen_markov markov;
439 1 : passgen_markov_init(&markov, 2);
440 :
441 1 : passgen_markov_add(&markov, &(const uint32_t[]){'a', 'b'}[0], 2, 1);
442 1 : passgen_markov_add(&markov, &(const uint32_t[]){'b'}[0], 1, 1);
443 1 : passgen_markov_add(
444 : &markov,
445 1 : &(const uint32_t[]){'c', 'd', 'e', 'f'}[0],
446 : 4,
447 : 1);
448 :
449 : /*
450 : uint32_t next, word[32];
451 : memset(word, 0, sizeof(word));
452 :
453 : word[2] = passgen_markov_generate(&markov, &word[0], rand);
454 : assert(word[2] == 'c');
455 : word[3] = passgen_markov_generate(&markov, &word[1], rand);
456 : assert(word[3] == 'd');
457 : word[4] = passgen_markov_generate(&markov, &word[2], rand);
458 : assert(word[4] == 'e');
459 : word[5] = passgen_markov_generate(&markov, &word[3], rand);
460 : assert(word[5] == 'f');
461 : word[6] = passgen_markov_generate(&markov, &word[4], rand);
462 : assert(word[6] == 0);
463 :
464 : word[2] = passgen_markov_generate(&markov, &word[0], rand);
465 : assert(word[2] == 'b');
466 : word[3] = passgen_markov_generate(&markov, &word[1], rand);
467 : assert(word[3] == 0);
468 : */
469 :
470 1 : passgen_markov_free(&markov);
471 1 : passgen_random_free(rand);
472 :
473 1 : return test_ok;
474 : }
|