main: fix two one-byte overreads in header_value()
[project/cgi-io.git] / multipart_parser.c
1 /* Based on node-formidable by Felix Geisendörfer
2 * Igor Afonov - afonov@gmail.com - 2012
3 * MIT License - http://www.opensource.org/licenses/mit-license.php
4 */
5
6 #include "multipart_parser.h"
7
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <string.h>
11
12 static void multipart_log(const char * format, ...)
13 {
14 #ifdef DEBUG_MULTIPART
15 va_list args;
16 va_start(args, format);
17
18 fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
19 vfprintf(stderr, format, args);
20 fprintf(stderr, "\n");
21 #endif
22 }
23
24 #define NOTIFY_CB(FOR) \
25 do { \
26 if (p->settings->on_##FOR) { \
27 if (p->settings->on_##FOR(p) != 0) { \
28 return i; \
29 } \
30 } \
31 } while (0)
32
33 #define EMIT_DATA_CB(FOR, ptr, len) \
34 do { \
35 if (p->settings->on_##FOR) { \
36 if (p->settings->on_##FOR(p, ptr, len) != 0) { \
37 return i; \
38 } \
39 } \
40 } while (0)
41
42
43 #define LF 10
44 #define CR 13
45
46 struct multipart_parser {
47 void * data;
48
49 size_t index;
50 size_t boundary_length;
51
52 unsigned char state;
53
54 const multipart_parser_settings* settings;
55
56 char* lookbehind;
57 char multipart_boundary[1];
58 };
59
60 enum state {
61 s_uninitialized = 1,
62 s_start,
63 s_start_boundary,
64 s_header_field_start,
65 s_header_field,
66 s_headers_almost_done,
67 s_header_value_start,
68 s_header_value,
69 s_header_value_almost_done,
70 s_part_data_start,
71 s_part_data,
72 s_part_data_almost_boundary,
73 s_part_data_boundary,
74 s_part_data_almost_end,
75 s_part_data_end,
76 s_part_data_final_hyphen,
77 s_end
78 };
79
80 multipart_parser* multipart_parser_init
81 (const char *boundary, const multipart_parser_settings* settings) {
82
83 multipart_parser* p = malloc(sizeof(multipart_parser) +
84 strlen(boundary) +
85 strlen(boundary) + 9);
86
87 if (!p)
88 return NULL;
89
90 strcpy(p->multipart_boundary, boundary);
91 p->boundary_length = strlen(boundary);
92
93 p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
94
95 p->index = 0;
96 p->state = s_start;
97 p->settings = settings;
98
99 return p;
100 }
101
102 void multipart_parser_free(multipart_parser* p) {
103 free(p);
104 }
105
106 void multipart_parser_set_data(multipart_parser *p, void *data) {
107 p->data = data;
108 }
109
110 void *multipart_parser_get_data(multipart_parser *p) {
111 return p->data;
112 }
113
114 size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
115 size_t i = 0;
116 size_t mark = 0;
117 char c, cl;
118 int is_last = 0;
119
120 while(i < len) {
121 c = buf[i];
122 is_last = (i == (len - 1));
123 switch (p->state) {
124 case s_start:
125 multipart_log("s_start");
126 p->index = 0;
127 p->state = s_start_boundary;
128
129 /* fallthrough */
130 case s_start_boundary:
131 multipart_log("s_start_boundary");
132 if (p->index == p->boundary_length) {
133 if (c != CR) {
134 return i;
135 }
136 p->index++;
137 break;
138 } else if (p->index == (p->boundary_length + 1)) {
139 if (c != LF) {
140 return i;
141 }
142 p->index = 0;
143 NOTIFY_CB(part_data_begin);
144 p->state = s_header_field_start;
145 break;
146 }
147 if (c != p->multipart_boundary[p->index]) {
148 return i;
149 }
150 p->index++;
151 break;
152
153 case s_header_field_start:
154 multipart_log("s_header_field_start");
155 mark = i;
156 p->state = s_header_field;
157
158 /* fallthrough */
159 case s_header_field:
160 multipart_log("s_header_field");
161 if (c == CR) {
162 p->state = s_headers_almost_done;
163 break;
164 }
165
166 if (c == '-') {
167 break;
168 }
169
170 if (c == ':') {
171 EMIT_DATA_CB(header_field, buf + mark, i - mark);
172 p->state = s_header_value_start;
173 break;
174 }
175
176 cl = tolower(c);
177 if (cl < 'a' || cl > 'z') {
178 multipart_log("invalid character in header name");
179 return i;
180 }
181 if (is_last)
182 EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
183 break;
184
185 case s_headers_almost_done:
186 multipart_log("s_headers_almost_done");
187 if (c != LF) {
188 return i;
189 }
190
191 p->state = s_part_data_start;
192 break;
193
194 case s_header_value_start:
195 multipart_log("s_header_value_start");
196 if (c == ' ') {
197 break;
198 }
199
200 mark = i;
201 p->state = s_header_value;
202
203 /* fallthrough */
204 case s_header_value:
205 multipart_log("s_header_value");
206 if (c == CR) {
207 EMIT_DATA_CB(header_value, buf + mark, i - mark);
208 p->state = s_header_value_almost_done;
209 }
210 if (is_last)
211 EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
212 break;
213
214 case s_header_value_almost_done:
215 multipart_log("s_header_value_almost_done");
216 if (c != LF) {
217 return i;
218 }
219 p->state = s_header_field_start;
220 break;
221
222 case s_part_data_start:
223 multipart_log("s_part_data_start");
224 NOTIFY_CB(headers_complete);
225 mark = i;
226 p->state = s_part_data;
227
228 /* fallthrough */
229 case s_part_data:
230 multipart_log("s_part_data");
231 if (c == CR) {
232 EMIT_DATA_CB(part_data, buf + mark, i - mark);
233 mark = i;
234 p->state = s_part_data_almost_boundary;
235 p->lookbehind[0] = CR;
236 break;
237 }
238 if (is_last)
239 EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
240 break;
241
242 case s_part_data_almost_boundary:
243 multipart_log("s_part_data_almost_boundary");
244 if (c == LF) {
245 p->state = s_part_data_boundary;
246 p->lookbehind[1] = LF;
247 p->index = 0;
248 break;
249 }
250 EMIT_DATA_CB(part_data, p->lookbehind, 1);
251 p->state = s_part_data;
252 mark = i --;
253 break;
254
255 case s_part_data_boundary:
256 multipart_log("s_part_data_boundary");
257 if (p->multipart_boundary[p->index] != c) {
258 EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
259 p->state = s_part_data;
260 mark = i --;
261 break;
262 }
263 p->lookbehind[2 + p->index] = c;
264 if ((++ p->index) == p->boundary_length) {
265 NOTIFY_CB(part_data_end);
266 p->state = s_part_data_almost_end;
267 }
268 break;
269
270 case s_part_data_almost_end:
271 multipart_log("s_part_data_almost_end");
272 if (c == '-') {
273 p->state = s_part_data_final_hyphen;
274 break;
275 }
276 if (c == CR) {
277 p->state = s_part_data_end;
278 break;
279 }
280 return i;
281
282 case s_part_data_final_hyphen:
283 multipart_log("s_part_data_final_hyphen");
284 if (c == '-') {
285 NOTIFY_CB(body_end);
286 p->state = s_end;
287 break;
288 }
289 return i;
290
291 case s_part_data_end:
292 multipart_log("s_part_data_end");
293 if (c == LF) {
294 p->state = s_header_field_start;
295 NOTIFY_CB(part_data_begin);
296 break;
297 }
298 return i;
299
300 case s_end:
301 multipart_log("s_end: %02X", (int) c);
302 break;
303
304 default:
305 multipart_log("Multipart parser unrecoverable error");
306 return 0;
307 }
308 ++ i;
309 }
310
311 return len;
312 }