/builds/xfbs/passgen/src/tests/utf8.c
Line | Count | Source |
1 | | #include "passgen/util/utf8.h" |
2 | | #include "tests.h" |
3 | | |
4 | | // test that we can decode an empty string. |
5 | 1 | test_result test_utf8_can_decode_empty(void) { |
6 | 1 | size_t input_size = 0; |
7 | 1 | const uint8_t input[input_size + 1]; |
8 | 1 | size_t output_size = 0; |
9 | 1 | uint32_t output[output_size + 1]; |
10 | 1 | size_t input_pos = 0; |
11 | 1 | size_t output_pos = 0; |
12 | 1 | |
13 | 1 | int ret = passgen_utf8_decode( |
14 | 1 | &output[0], |
15 | 1 | output_size, |
16 | 1 | &output_pos, |
17 | 1 | NULL, |
18 | 1 | &input[0], |
19 | 1 | input_size, |
20 | 1 | &input_pos); |
21 | 1 | |
22 | 1 | assert(ret == 0); |
23 | 1 | assert(output_pos == 0); |
24 | 1 | assert(input_pos == 0); |
25 | 1 | |
26 | 1 | return test_ok; |
27 | 1 | } |
28 | | |
29 | | // test that we can decode an empty string with character widths. |
30 | 1 | test_result test_utf8_can_decode_empty_widths(void) { |
31 | 1 | size_t input_size = 0; |
32 | 1 | const uint8_t input[input_size + 1]; |
33 | 1 | size_t output_size = 1; |
34 | 1 | uint32_t output[output_size]; |
35 | 1 | uint8_t widths[output_size]; |
36 | 1 | size_t input_pos = 0; |
37 | 1 | size_t output_pos = 0; |
38 | 1 | |
39 | 1 | int ret = passgen_utf8_decode( |
40 | 1 | &output[0], |
41 | 1 | output_size, |
42 | 1 | &output_pos, |
43 | 1 | &widths[0], |
44 | 1 | &input[0], |
45 | 1 | input_size, |
46 | 1 | &input_pos); |
47 | 1 | |
48 | 1 | assert(ret == 0); |
49 | 1 | assert(output_pos == 0); |
50 | 1 | assert(input_pos == 0); |
51 | 1 | |
52 | 1 | return test_ok; |
53 | 1 | } |
54 | | |
55 | | // test that we can decode a simple utf-8 sequence. |
56 | 1 | test_result test_utf8_can_decode_simple(void) { |
57 | 1 | // üðÂÂÂõ |
58 | 1 | const uint8_t input[] = |
59 | 1 | {0xc3, 0xbc, 0xf0, 0x9f, 0x98, 0x82, 0xc2, 0xb5, 0x0a}; |
60 | 1 | size_t input_size = sizeof(input); |
61 | 1 | |
62 | 1 | size_t output_size = 100; |
63 | 1 | uint32_t output[output_size]; |
64 | 1 | |
65 | 1 | size_t input_pos = 0; |
66 | 1 | size_t output_pos = 0; |
67 | 1 | |
68 | 1 | int ret = passgen_utf8_decode( |
69 | 1 | output, |
70 | 1 | output_size, |
71 | 1 | &output_pos, |
72 | 1 | NULL, |
73 | 1 | input, |
74 | 1 | input_size, |
75 | 1 | &input_pos); |
76 | 1 | |
77 | 1 | // succesful return |
78 | 1 | assert(ret == 0); |
79 | 1 | |
80 | 1 | // parsed all data |
81 | 1 | assert(input_pos == input_size); |
82 | 1 | assert(output_pos == 4); |
83 | 1 | |
84 | 1 | // parsed right characters |
85 | 1 | assert(output[0] == 0xFC); |
86 | 1 | assert(output[1] == 0x1F602); |
87 | 1 | assert(output[2] == 0xB5); |
88 | 1 | assert(output[3] == 0x0A); |
89 | 1 | |
90 | 1 | return test_ok; |
91 | 1 | } |
92 | | |
93 | | // test that we can decode a simple utf-8 sequence with character widths. |
94 | 1 | test_result test_utf8_can_decode_simple_widths(void) { |
95 | 1 | // üðÂÂÂõ |
96 | 1 | const uint8_t input[] = |
97 | 1 | {0xc3, 0xbc, 0xf0, 0x9f, 0x98, 0x82, 0xc2, 0xb5, 0x0a}; |
98 | 1 | size_t input_size = sizeof(input); |
99 | 1 | |
100 | 1 | size_t output_size = 100; |
101 | 1 | uint32_t output[output_size]; |
102 | 1 | uint8_t widths[output_size]; |
103 | 1 | |
104 | 1 | size_t input_pos = 0; |
105 | 1 | size_t output_pos = 0; |
106 | 1 | |
107 | 1 | int ret = passgen_utf8_decode( |
108 | 1 | output, |
109 | 1 | output_size, |
110 | 1 | &output_pos, |
111 | 1 | &widths[0], |
112 | 1 | input, |
113 | 1 | input_size, |
114 | 1 | &input_pos); |
115 | 1 | |
116 | 1 | // successful return |
117 | 1 | assert(ret == 0); |
118 | 1 | |
119 | 1 | // processed all input data |
120 | 1 | assert(input_pos == input_size); |
121 | 1 | assert(output_pos == 4); |
122 | 1 | |
123 | 1 | // parsed characters properly |
124 | 1 | assert(output[0] == 0xFC); |
125 | 1 | assert(widths[0] == 2); |
126 | 1 | |
127 | 1 | assert(output[1] == 0x1F602); |
128 | 1 | assert(widths[1] == 4); |
129 | 1 | |
130 | 1 | assert(output[2] == 0xB5); |
131 | 1 | assert(widths[2] == 2); |
132 | 1 | |
133 | 1 | assert(output[3] == 0x0A); |
134 | 1 | assert(widths[3] == 1); |
135 | 1 | |
136 | 1 | return test_ok; |
137 | 1 | } |
138 | | |
139 | | // test that we can decode a simple UTF-8 string, if there is not enough |
140 | | // space in the output array, by restarting the decoding. |
141 | 1 | test_result test_utf8_decode_short_output(void) { |
142 | 1 | // üðÂÂÂõ |
143 | 1 | const uint8_t input[] = |
144 | 1 | {0xc3, 0xbc, 0xf0, 0x9f, 0x98, 0x82, 0xc2, 0xb5, 0x0a}; |
145 | 1 | size_t input_size = sizeof(input); |
146 | 1 | |
147 | 1 | size_t output_size = 3; |
148 | 1 | uint32_t output[output_size]; |
149 | 1 | |
150 | 1 | size_t input_pos = 0; |
151 | 1 | size_t output_pos = 0; |
152 | 1 | |
153 | 1 | int ret = passgen_utf8_decode( |
154 | 1 | output, |
155 | 1 | output_size, |
156 | 1 | &output_pos, |
157 | 1 | NULL, |
158 | 1 | input, |
159 | 1 | input_size, |
160 | 1 | &input_pos); |
161 | 1 | |
162 | 1 | // on first run, it should only decode the first three characters. it |
163 | 1 | // should return larger than zero because there is still content to be |
164 | 1 | // decoded. |
165 | 1 | assert(ret > 0); |
166 | 1 | assert(input_pos == input_size - 1); |
167 | 1 | assert(output_pos == output_size); |
168 | 1 | assert(output[0] == 0xFC); |
169 | 1 | assert(output[1] == 0x1F602); |
170 | 1 | assert(output[2] == 0xB5); |
171 | 1 | |
172 | 1 | // reset where the current output pointer is at |
173 | 1 | output_pos = 0; |
174 | 1 | |
175 | 1 | // continue parsing the rest of the utf-8 sequence. |
176 | 1 | ret = passgen_utf8_decode( |
177 | 1 | output, |
178 | 1 | output_size, |
179 | 1 | &output_pos, |
180 | 1 | NULL, |
181 | 1 | input, |
182 | 1 | input_size, |
183 | 1 | &input_pos); |
184 | 1 | |
185 | 1 | // on second run, the final character is decoded. |
186 | 1 | assert(ret == 0); |
187 | 1 | assert(input_pos == input_size); |
188 | 1 | assert(output_pos == 1); |
189 | 1 | assert(output[0] == 0x0A); |
190 | 1 | |
191 | 1 | return test_ok; |
192 | 1 | } |
193 | | |
194 | 1 | test_result test_utf8_encode_simple(void) { |
195 | 1 | // test that we can decode a simple UTF-8 string. |
196 | 1 | |
197 | 1 | // üðÂÂÂõ |
198 | 1 | const uint8_t expected[] = |
199 | 1 | {0xc3, 0xbc, 0xf0, 0x9f, 0x98, 0x82, 0xc2, 0xb5, 0x0a}; |
200 | 1 | |
201 | 1 | const uint32_t input[] = {0xFC, 0x1F602, 0xB5, 0x0A}; |
202 | 1 | |
203 | 1 | uint8_t output[9]; |
204 | 1 | |
205 | 1 | size_t in_pos = 0; |
206 | 1 | size_t out_pos = 0; |
207 | 1 | |
208 | 1 | int ret = passgen_utf8_encode( |
209 | 1 | output, |
210 | 1 | sizeof(output) / sizeof(output[0]), |
211 | 1 | &out_pos, |
212 | 1 | input, |
213 | 1 | sizeof(input) / sizeof(input[0]), |
214 | 1 | &in_pos); |
215 | 1 | |
216 | 1 | assert(ret != 0); |
217 | 1 | assert(in_pos == (sizeof(input) / sizeof(input[0]))); |
218 | 1 | assert(out_pos == (sizeof(expected) / sizeof(expected[0]))); |
219 | 10 | for(size_t i = 0; i < out_pos; i++9 ) { |
220 | 9 | assert(output[i] == expected[i]); |
221 | 9 | } |
222 | 1 | |
223 | 1 | return test_ok; |
224 | 1 | } |