/builds/xfbs/passgen/src/parser/token.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "passgen/parser/token.h" |
2 | | |
3 | 61 | void passgen_token_parser_init(struct passgen_token_parser *token_parser) { |
4 | 61 | token_parser->state = PASSGEN_TOKEN_INIT; |
5 | 61 | token_parser->offset = 0; |
6 | 61 | token_parser->byte_offset = 0; |
7 | 61 | } |
8 | | |
9 | 263 | const char *passgen_token_state_string(enum passgen_token_state state) { |
10 | 263 | switch(state) { |
11 | 1 | case PASSGEN_TOKEN_INIT: |
12 | 1 | return "ready for parsing"; |
13 | 208 | case PASSGEN_TOKEN_ESCAPED: |
14 | 208 | return "parsing escaped token"; |
15 | 4 | case PASSGEN_TOKEN_UNICODE: |
16 | 4 | return "parsing unicode token"; |
17 | 3 | case PASSGEN_TOKEN_UNICODE_PAYLOAD: |
18 | 3 | return "parsing unicode token payload"; |
19 | 41 | case PASSGEN_TOKEN_ERROR_UNICODE_START: |
20 | 41 | return "unexpected character while parsing unicode literal"; |
21 | 1 | case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD: |
22 | 1 | return "unexpected character while parsing unicode literal"; |
23 | 3 | case PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN: |
24 | 3 | return "unicode literal payload too long"; |
25 | 2 | default: |
26 | 2 | return NULL; |
27 | 263 | } |
28 | 263 | } |
29 | | |
30 | 5.56M | static inline int8_t hex_decode(uint32_t c) { |
31 | 5.56M | if('0' <= c && c <= '9'5.56M ) { |
32 | 3.50M | return c - '0'; |
33 | 3.50M | } |
34 | 2.06M | |
35 | 2.06M | if('A' <= c && c <= 'F'2.06M ) { |
36 | 5 | return 10 + c - 'A'; |
37 | 5 | } |
38 | 2.06M | |
39 | 2.06M | if('a' <= c && c <= 'f'2.06M ) { |
40 | 2.06M | return 10 + c - 'a'; |
41 | 2.06M | } |
42 | 10 | |
43 | 10 | return -1; |
44 | 10 | } |
45 | | |
46 | | static inline void token_parse_init( |
47 | | struct passgen_token_parser *parser, |
48 | | struct passgen_token *token, |
49 | 3.68M | uint32_t codepoint) { |
50 | 3.68M | // save position of initial token |
51 | 3.68M | token->offset = parser->offset; |
52 | 3.68M | token->byte_offset = parser->byte_offset; |
53 | 3.68M | |
54 | 3.68M | if(codepoint == '\\') { |
55 | 2.23M | parser->state = PASSGEN_TOKEN_ESCAPED; |
56 | 2.23M | } else { |
57 | 1.45M | token->codepoint = codepoint; |
58 | 1.45M | parser->state = PASSGEN_TOKEN_INIT; |
59 | 1.45M | } |
60 | 3.68M | } |
61 | | |
62 | | // Simple ASCII escape map. Don't use this for large (unicode) codepoints. |
63 | | // Provides efficient O(1) lookup. |
64 | | static const char simple_escaped[] = { |
65 | | 0, |
66 | | ['a'] = '\a', |
67 | | ['b'] = '\b', |
68 | | ['e'] = '\033', |
69 | | ['f'] = '\f', |
70 | | ['n'] = '\n', |
71 | | ['r'] = '\r', |
72 | | ['t'] = '\t', |
73 | | ['v'] = '\v', |
74 | | ['\\'] = '\\'}; |
75 | | |
76 | | static inline void token_parse_escaped( |
77 | | struct passgen_token_parser *parser, |
78 | | struct passgen_token *token, |
79 | 2.23M | uint32_t codepoint) { |
80 | 2.23M | // simple_escaped only covers ASCII, whereas codepoint could be much |
81 | 2.23M | // larger. |
82 | 2.23M | if(codepoint < sizeof(simple_escaped) && simple_escaped[codepoint]1.11M ) { |
83 | 388 | token->codepoint = simple_escaped[codepoint]; |
84 | 388 | parser->state = PASSGEN_TOKEN_INIT; |
85 | 388 | |
86 | 388 | return; |
87 | 388 | } |
88 | 2.23M | |
89 | 2.23M | switch(codepoint) { |
90 | 1.11M | case 'u': |
91 | 1.11M | parser->state = PASSGEN_TOKEN_UNICODE; |
92 | 1.11M | break; |
93 | 1.11M | default: |
94 | 1.11M | token->codepoint = codepoint | PASSGEN_TOKEN_ESCAPED_BIT; |
95 | 1.11M | parser->state = PASSGEN_TOKEN_INIT; |
96 | 2.23M | } |
97 | 2.23M | } |
98 | | |
99 | | static inline void |
100 | 1.11M | token_parse_unicode(struct passgen_token_parser *parser, uint32_t codepoint) { |
101 | 1.11M | if(codepoint == '{') { |
102 | 1.11M | parser->state = PASSGEN_TOKEN_UNICODE_PAYLOAD; |
103 | 1.11M | parser->data.unicode_payload.length = 0; |
104 | 1.11M | parser->data.unicode_payload.codepoint = 0; |
105 | 1.11M | } else { |
106 | 51 | parser->state = PASSGEN_TOKEN_ERROR_UNICODE_START; |
107 | 51 | } |
108 | 1.11M | } |
109 | | |
110 | | static inline void token_parse_unicode_payload( |
111 | | struct passgen_token_parser *parser, |
112 | | struct passgen_token *token, |
113 | 6.68M | uint32_t codepoint) { |
114 | 6.68M | // once we read the closing brace, the payload is over and we can emit the |
115 | 6.68M | // token. |
116 | 6.68M | if(codepoint == '}') { |
117 | 1.11M | token->codepoint = parser->data.unicode_payload.codepoint; |
118 | 1.11M | parser->state = PASSGEN_TOKEN_INIT; |
119 | 1.11M | |
120 | 1.11M | return; |
121 | 1.11M | } |
122 | 5.56M | |
123 | 5.56M | // keep track of length, make sure it's not too long. |
124 | 5.56M | parser->data.unicode_payload.length++; |
125 | 5.56M | if(parser->data.unicode_payload.length > 6) { |
126 | 6 | parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD_LEN; |
127 | 6 | |
128 | 6 | return; |
129 | 6 | } |
130 | 5.56M | |
131 | 5.56M | // try to decode the hex value. |
132 | 5.56M | int8_t decoded = hex_decode(codepoint); |
133 | 5.56M | if(decoded < 0) { |
134 | 10 | parser->state = PASSGEN_TOKEN_ERROR_UNICODE_PAYLOAD; |
135 | 10 | |
136 | 10 | return; |
137 | 10 | } |
138 | 5.56M | |
139 | 5.56M | parser->data.unicode_payload.codepoint *= 16; |
140 | 5.56M | parser->data.unicode_payload.codepoint += decoded; |
141 | 5.56M | } |
142 | | |
143 | | int passgen_token_parse( |
144 | | struct passgen_token_parser *parser, |
145 | | struct passgen_token *token, |
146 | | uint8_t width, |
147 | 13.7M | uint32_t codepoint) { |
148 | 13.7M | switch(parser->state) { |
149 | 3.68M | case PASSGEN_TOKEN_INIT: |
150 | 3.68M | token_parse_init(parser, token, codepoint); |
151 | 3.68M | break; |
152 | 2.23M | case PASSGEN_TOKEN_ESCAPED: |
153 | 2.23M | token_parse_escaped(parser, token, codepoint); |
154 | 2.23M | break; |
155 | 1.11M | case PASSGEN_TOKEN_UNICODE: |
156 | 1.11M | token_parse_unicode(parser, codepoint); |
157 | 1.11M | break; |
158 | 6.68M | case PASSGEN_TOKEN_UNICODE_PAYLOAD: |
159 | 6.68M | token_parse_unicode_payload(parser, token, codepoint); |
160 | 6.68M | break; |
161 | 3 | default: |
162 | 3 | return parser->state; |
163 | 13.7M | } |
164 | 13.7M | |
165 | 13.7M | // update parser offsets |
166 | 13.7M | parser->offset += 1; |
167 | 13.7M | parser->byte_offset += width; |
168 | 13.7M | |
169 | 13.7M | return parser->state; |
170 | 13.7M | } |
171 | | |
172 | 254 | const char *passgen_token_parse_error_str(int ret) { |
173 | 254 | return passgen_token_state_string(ret); |
174 | 254 | } |