uclient: fix http regression
[project/uclient.git] / uclient-fetch.c
1 /*
2 * uclient - ustream based protocol client library
3 *
4 * Copyright (C) 2014 Felix Fietkau <nbd@openwrt.org>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #define _GNU_SOURCE
20 #include <sys/stat.h>
21 #include <sys/socket.h>
22 #include <unistd.h>
23 #include <stdio.h>
24 #include <dlfcn.h>
25 #include <getopt.h>
26 #include <fcntl.h>
27 #include <glob.h>
28 #include <stdint.h>
29 #include <inttypes.h>
30 #include <signal.h>
31
32 #include <libubox/blobmsg.h>
33
34 #include "progress.h"
35 #include "uclient.h"
36 #include "uclient-utils.h"
37
38 #ifdef __APPLE__
39 #define LIB_EXT "dylib"
40 #else
41 #define LIB_EXT "so"
42 #endif
43
44 #ifndef strdupa
45 #define strdupa(x) strcpy(alloca(strlen(x)+1),x)
46 #endif
47
48 static const char *user_agent = "uclient-fetch";
49 static const char *post_data;
50 static const char *post_file;
51 static struct ustream_ssl_ctx *ssl_ctx;
52 static const struct ustream_ssl_ops *ssl_ops;
53 static int quiet = false;
54 static bool verify = true;
55 static bool proxy = true;
56 static bool default_certs = false;
57 static bool no_output;
58 static const char *opt_output_file;
59 static int output_fd = -1;
60 static int error_ret;
61 static off_t out_offset;
62 static off_t out_bytes;
63 static off_t out_len;
64 static char *auth_str;
65 static char **urls;
66 static int n_urls;
67 static int timeout;
68 static bool resume, cur_resume;
69
70 static struct progress pmt;
71 static struct uloop_timeout pmt_timer;
72
73 static int init_request(struct uclient *cl);
74 static void request_done(struct uclient *cl);
75
76 static void pmt_update(struct uloop_timeout *t)
77 {
78 progress_update(&pmt, out_offset, out_bytes, out_len);
79 uloop_timeout_set(t, 1000);
80 }
81
82 static const char *
83 get_proxy_url(char *url)
84 {
85 char prefix[16];
86 char *sep;
87
88 if (!proxy)
89 return NULL;
90
91 sep = strchr(url, ':');
92 if (!sep)
93 return NULL;
94
95 if (sep - url > 5)
96 return NULL;
97
98 memcpy(prefix, url, sep - url);
99 strcpy(prefix + (sep - url), "_proxy");
100 return getenv(prefix);
101 }
102
103 static int open_output_file(const char *path, uint64_t resume_offset)
104 {
105 const char *output_file = opt_output_file;
106 char *filename = NULL;
107 int flags;
108 int ret;
109
110 if (cur_resume)
111 flags = O_RDWR;
112 else
113 flags = O_WRONLY | O_TRUNC;
114
115 if (!cur_resume && !output_file)
116 flags |= O_EXCL;
117
118 flags |= O_CREAT;
119
120 if (output_file) {
121 if (!strcmp(output_file, "-")) {
122 if (!quiet)
123 fprintf(stderr, "Writing to stdout\n");
124
125 ret = STDOUT_FILENO;
126 goto done;
127 }
128 } else {
129 filename = uclient_get_url_filename(path, "index.html");
130 if (!filename) {
131 ret = -ENOMEM;
132 goto out;
133 }
134
135 output_file = filename;
136 }
137
138 if (!quiet)
139 fprintf(stderr, "Writing to '%s'\n", output_file);
140 ret = open(output_file, flags, 0644);
141 if (ret < 0)
142 goto free;
143
144 if (resume_offset &&
145 lseek(ret, resume_offset, SEEK_SET) < 0) {
146 if (!quiet)
147 fprintf(stderr, "Failed to seek %"PRIu64" bytes in output file\n", resume_offset);
148 close(ret);
149 ret = -1;
150 goto free;
151 }
152
153 out_offset = resume_offset;
154 out_bytes += resume_offset;
155 done:
156 if (!quiet) {
157 progress_init(&pmt, output_file);
158 pmt_timer.cb = pmt_update;
159 pmt_timer.cb(&pmt_timer);
160 }
161
162 free:
163 free(filename);
164 out:
165 return ret;
166 }
167
168 static void header_done_cb(struct uclient *cl)
169 {
170 enum {
171 H_RANGE,
172 H_LEN,
173 __H_MAX
174 };
175 static const struct blobmsg_policy policy[__H_MAX] = {
176 [H_RANGE] = { .name = "content-range", .type = BLOBMSG_TYPE_STRING },
177 [H_LEN] = { .name = "content-length", .type = BLOBMSG_TYPE_STRING },
178 };
179 struct blob_attr *tb[__H_MAX];
180 uint64_t resume_offset = 0, resume_end, resume_size;
181 static int retries;
182
183 if (retries < 10) {
184 int ret = uclient_http_redirect(cl);
185 if (ret < 0) {
186 if (!quiet)
187 fprintf(stderr, "Failed to redirect to %s on %s\n", cl->url->location, cl->url->host);
188 error_ret = 8;
189 request_done(cl);
190 return;
191 }
192 if (ret > 0) {
193 if (!quiet)
194 fprintf(stderr, "Redirected to %s on %s\n", cl->url->location, cl->url->host);
195
196 retries++;
197 return;
198 }
199 }
200
201 if (cl->status_code == 204 && cur_resume) {
202 /* Resume attempt failed, try normal download */
203 cur_resume = false;
204 init_request(cl);
205 return;
206 }
207
208 blobmsg_parse(policy, __H_MAX, tb, blob_data(cl->meta), blob_len(cl->meta));
209
210 switch (cl->status_code) {
211 case 416:
212 if (!quiet)
213 fprintf(stderr, "File download already fully retrieved; nothing to do.\n");
214 request_done(cl);
215 break;
216 case 206:
217 if (!cur_resume) {
218 if (!quiet)
219 fprintf(stderr, "Error: Partial content received, full content requested\n");
220 error_ret = 8;
221 request_done(cl);
222 break;
223 }
224
225 if (!tb[H_RANGE]) {
226 if (!quiet)
227 fprintf(stderr, "Content-Range header is missing\n");
228 error_ret = 8;
229 break;
230 }
231
232 if (sscanf(blobmsg_get_string(tb[H_RANGE]),
233 "bytes %"PRIu64"-%"PRIu64"/%"PRIu64,
234 &resume_offset, &resume_end, &resume_size) != 3) {
235 if (!quiet)
236 fprintf(stderr, "Content-Range header is invalid\n");
237 error_ret = 8;
238 break;
239 }
240 /* fall through */
241 case 204:
242 case 200:
243 if (no_output)
244 break;
245
246 if (tb[H_LEN])
247 out_len = strtoul(blobmsg_get_string(tb[H_LEN]), NULL, 10);
248
249 output_fd = open_output_file(cl->url->location, resume_offset);
250 if (output_fd < 0) {
251 if (!quiet)
252 perror("Cannot open output file");
253 error_ret = 3;
254 request_done(cl);
255 }
256 break;
257
258 default:
259 if (!quiet)
260 fprintf(stderr, "HTTP error %d\n", cl->status_code);
261 request_done(cl);
262 error_ret = 8;
263 break;
264 }
265 }
266
267 static void read_data_cb(struct uclient *cl)
268 {
269 char buf[256];
270 ssize_t n;
271 int len;
272
273 if (!no_output && output_fd < 0)
274 return;
275
276 while (1) {
277 len = uclient_read(cl, buf, sizeof(buf));
278 if (len <= 0)
279 return;
280
281 out_bytes += len;
282 if (!no_output) {
283 n = write(output_fd, buf, len);
284 if (n < 0)
285 return;
286 }
287 }
288 }
289
290 static void msg_connecting(struct uclient *cl)
291 {
292 char addr[INET6_ADDRSTRLEN];
293 int port;
294
295 if (quiet)
296 return;
297
298 uclient_get_addr(addr, &port, &cl->remote_addr);
299 fprintf(stderr, "Connecting to %s:%d\n", addr, port);
300 }
301
302 static void check_resume_offset(struct uclient *cl)
303 {
304 char range_str[64];
305 struct stat st;
306 char *file;
307 int ret;
308
309 file = uclient_get_url_filename(cl->url->location, "index.html");
310 if (!file)
311 return;
312
313 ret = stat(file, &st);
314 free(file);
315 if (ret)
316 return;
317
318 if (!st.st_size)
319 return;
320
321 snprintf(range_str, sizeof(range_str), "bytes=%"PRIu64"-", (uint64_t) st.st_size);
322 uclient_http_set_header(cl, "Range", range_str);
323 }
324
325 static int init_request(struct uclient *cl)
326 {
327 int rc;
328
329 out_offset = 0;
330 out_bytes = 0;
331 out_len = 0;
332 uclient_http_set_ssl_ctx(cl, ssl_ops, ssl_ctx, verify);
333
334 if (timeout)
335 cl->timeout_msecs = timeout * 1000;
336
337 rc = uclient_connect(cl);
338 if (rc)
339 return rc;
340
341 msg_connecting(cl);
342
343 rc = uclient_http_set_request_type(cl, post_data || post_file ? "POST" : "GET");
344 if (rc)
345 return rc;
346
347 uclient_http_reset_headers(cl);
348 uclient_http_set_header(cl, "User-Agent", user_agent);
349 if (cur_resume)
350 check_resume_offset(cl);
351
352 if (post_data) {
353 uclient_http_set_header(cl, "Content-Type", "application/x-www-form-urlencoded");
354 uclient_write(cl, post_data, strlen(post_data));
355 }
356 else if(post_file)
357 {
358 FILE *input_file;
359 uclient_http_set_header(cl, "Content-Type", "application/x-www-form-urlencoded");
360
361 input_file = fopen(post_file, "r");
362 if (!input_file)
363 return errno;
364
365 char tbuf[1024];
366 size_t rlen = 0;
367 do
368 {
369 rlen = fread(tbuf, 1, sizeof(tbuf), input_file);
370 uclient_write(cl, tbuf, rlen);
371 }
372 while(rlen);
373
374 fclose(input_file);
375 }
376
377 rc = uclient_request(cl);
378 if (rc)
379 return rc;
380
381 return 0;
382 }
383
384 static void request_done(struct uclient *cl)
385 {
386 const char *proxy_url;
387
388 if (n_urls) {
389 proxy_url = get_proxy_url(*urls);
390 if (proxy_url) {
391 uclient_set_url(cl, proxy_url, NULL);
392 uclient_set_proxy_url(cl, *urls, auth_str);
393 } else {
394 uclient_set_url(cl, *urls, auth_str);
395 }
396 n_urls--;
397 cur_resume = resume;
398 error_ret = init_request(cl);
399 if (error_ret == 0)
400 return;
401 }
402
403 if (output_fd >= 0 && !opt_output_file) {
404 close(output_fd);
405 output_fd = -1;
406 }
407 uclient_disconnect(cl);
408 uloop_end();
409 }
410
411
412 static void eof_cb(struct uclient *cl)
413 {
414 if (!quiet) {
415 pmt_update(&pmt_timer);
416 uloop_timeout_cancel(&pmt_timer);
417 fprintf(stderr, "\n");
418 }
419
420 if (!cl->data_eof) {
421 if (!quiet)
422 fprintf(stderr, "Connection reset prematurely\n");
423 error_ret = 4;
424 } else if (!quiet) {
425 fprintf(stderr, "Download completed (%"PRIu64" bytes)\n", (uint64_t) out_bytes);
426 }
427 request_done(cl);
428 }
429
430 static void handle_uclient_error(struct uclient *cl, int code)
431 {
432 const char *type = "Unknown error";
433 bool ignore = false;
434
435 switch(code) {
436 case UCLIENT_ERROR_CONNECT:
437 type = "Connection failed";
438 error_ret = 4;
439 break;
440 case UCLIENT_ERROR_TIMEDOUT:
441 type = "Connection timed out";
442 error_ret = 4;
443 break;
444 case UCLIENT_ERROR_SSL_INVALID_CERT:
445 type = "Invalid SSL certificate";
446 ignore = !verify;
447 error_ret = 5;
448 break;
449 case UCLIENT_ERROR_SSL_CN_MISMATCH:
450 type = "Server hostname does not match SSL certificate";
451 ignore = !verify;
452 error_ret = 5;
453 break;
454 default:
455 error_ret = 1;
456 break;
457 }
458
459 if (!quiet)
460 fprintf(stderr, "Connection error: %s%s\n", type, ignore ? " (ignored)" : "");
461
462 if (ignore)
463 error_ret = 0;
464 else
465 request_done(cl);
466 }
467
468 static const struct uclient_cb cb = {
469 .header_done = header_done_cb,
470 .data_read = read_data_cb,
471 .data_eof = eof_cb,
472 .error = handle_uclient_error,
473 };
474
475 static int usage(const char *progname)
476 {
477 fprintf(stderr,
478 "Usage: %s [options] <URL>\n"
479 "Options:\n"
480 " -4 Use IPv4 only\n"
481 " -6 Use IPv6 only\n"
482 " -O <file> Redirect output to file (use \"-\" for stdout)\n"
483 " -P <dir> Set directory for output files\n"
484 " --quiet | -q Turn off status messages\n"
485 " --continue | -c Continue a partially-downloaded file\n"
486 " --user=<user> HTTP authentication username\n"
487 " --password=<password> HTTP authentication password\n"
488 " --user-agent | -U <str> Set HTTP user agent\n"
489 " --post-data=STRING use the POST method; send STRING as the data\n"
490 " --post-file=FILE use the POST method; send FILE as the data\n"
491 " --spider | -s Spider mode - only check file existence\n"
492 " --timeout=N | -T N Set connect/request timeout to N seconds\n"
493 " --proxy=on | -Y on Enable interpretation of proxy env vars (default)\n"
494 " --proxy=off | -Y off |\n"
495 " --no-proxy Disable interpretation of proxy env vars\n"
496 "\n"
497 "HTTPS options:\n"
498 " --ca-certificate=<cert> Load CA certificates from file <cert>\n"
499 " --no-check-certificate don't validate the server's certificate\n"
500 " --ciphers=<cipherlist> Set the cipher list string\n"
501 "\n", progname);
502 return 1;
503 }
504
505 static void init_ca_cert(void)
506 {
507 glob_t gl;
508 unsigned int i;
509
510 glob("/etc/ssl/certs/*.crt", 0, NULL, &gl);
511 for (i = 0; i < gl.gl_pathc; i++)
512 ssl_ops->context_add_ca_crt_file(ssl_ctx, gl.gl_pathv[i]);
513 globfree(&gl);
514 }
515
516 static void init_ustream_ssl(void)
517 {
518 void *dlh;
519
520 dlh = dlopen("libustream-ssl." LIB_EXT, RTLD_LAZY | RTLD_LOCAL);
521 if (!dlh)
522 return;
523
524 ssl_ops = dlsym(dlh, "ustream_ssl_ops");
525 if (!ssl_ops)
526 return;
527
528 ssl_ctx = ssl_ops->context_new(false);
529 }
530
531 static int no_ssl(const char *progname)
532 {
533 fprintf(stderr,
534 "%s: SSL support not available, please install one of the "
535 "libustream-.*[ssl|tls] packages as well as the ca-bundle and "
536 "ca-certificates packages.\n",
537 progname);
538
539 return 1;
540 }
541
542 enum {
543 L_NO_CHECK_CERTIFICATE,
544 L_CA_CERTIFICATE,
545 L_CIPHERS,
546 L_USER,
547 L_PASSWORD,
548 L_USER_AGENT,
549 L_POST_DATA,
550 L_POST_FILE,
551 L_SPIDER,
552 L_TIMEOUT,
553 L_CONTINUE,
554 L_PROXY,
555 L_NO_PROXY,
556 L_QUIET,
557 };
558
559 static const struct option longopts[] = {
560 [L_NO_CHECK_CERTIFICATE] = { "no-check-certificate", no_argument, NULL, 0 },
561 [L_CA_CERTIFICATE] = { "ca-certificate", required_argument, NULL, 0 },
562 [L_CIPHERS] = { "ciphers", required_argument, NULL, 0 },
563 [L_USER] = { "user", required_argument, NULL, 0 },
564 [L_PASSWORD] = { "password", required_argument, NULL, 0 },
565 [L_USER_AGENT] = { "user-agent", required_argument, NULL, 0 },
566 [L_POST_DATA] = { "post-data", required_argument, NULL, 0 },
567 [L_POST_FILE] = { "post-file", required_argument, NULL, 0 },
568 [L_SPIDER] = { "spider", no_argument, NULL, 0 },
569 [L_TIMEOUT] = { "timeout", required_argument, NULL, 0 },
570 [L_CONTINUE] = { "continue", no_argument, NULL, 0 },
571 [L_PROXY] = { "proxy", required_argument, NULL, 0 },
572 [L_NO_PROXY] = { "no-proxy", no_argument, NULL, 0 },
573 [L_QUIET] = { "quiet", no_argument, NULL, 0 },
574 {}
575 };
576
577
578
579 int main(int argc, char **argv)
580 {
581 const char *progname = argv[0];
582 const char *proxy_url;
583 char *username = NULL;
584 char *password = NULL;
585 struct uclient *cl;
586 int longopt_idx = 0;
587 bool has_cert = false;
588 int i, ch;
589 int rc;
590 int af = -1;
591
592 signal(SIGPIPE, SIG_IGN);
593 init_ustream_ssl();
594
595 while ((ch = getopt_long(argc, argv, "46cO:P:qsT:U:Y:", longopts, &longopt_idx)) != -1) {
596 switch(ch) {
597 case 0:
598 switch (longopt_idx) {
599 case L_NO_CHECK_CERTIFICATE:
600 verify = false;
601 if (ssl_ctx)
602 ssl_ops->context_set_require_validation(ssl_ctx, verify);
603 break;
604 case L_CA_CERTIFICATE:
605 has_cert = true;
606 if (ssl_ctx)
607 ssl_ops->context_add_ca_crt_file(ssl_ctx, optarg);
608 break;
609 case L_CIPHERS:
610 if (ssl_ctx) {
611 if (ssl_ops->context_set_ciphers(ssl_ctx, optarg)) {
612 if (!quiet)
613 fprintf(stderr, "No recognized ciphers in cipher list\n");
614 exit(1);
615 }
616 }
617 break;
618 case L_USER:
619 if (!strlen(optarg))
620 break;
621 username = strdupa(optarg);
622 memset(optarg, '*', strlen(optarg));
623 break;
624 case L_PASSWORD:
625 if (!strlen(optarg))
626 break;
627 password = strdupa(optarg);
628 memset(optarg, '*', strlen(optarg));
629 break;
630 case L_USER_AGENT:
631 user_agent = optarg;
632 break;
633 case L_POST_DATA:
634 post_data = optarg;
635 break;
636 case L_POST_FILE:
637 post_file = optarg;
638 break;
639 case L_SPIDER:
640 no_output = true;
641 break;
642 case L_TIMEOUT:
643 timeout = atoi(optarg);
644 break;
645 case L_CONTINUE:
646 resume = true;
647 break;
648 case L_PROXY:
649 if (strcmp(optarg, "on") != 0)
650 proxy = false;
651 break;
652 case L_NO_PROXY:
653 proxy = false;
654 break;
655 case L_QUIET:
656 quiet = true;
657 break;
658 default:
659 return usage(progname);
660 }
661 break;
662 case '4':
663 af = AF_INET;
664 break;
665 case '6':
666 af = AF_INET6;
667 break;
668 case 'c':
669 resume = true;
670 break;
671 case 'U':
672 user_agent = optarg;
673 break;
674 case 'O':
675 opt_output_file = optarg;
676 break;
677 case 'P':
678 if (chdir(optarg)) {
679 if (!quiet)
680 perror("Change output directory");
681 exit(1);
682 }
683 break;
684 case 'q':
685 quiet = true;
686 break;
687 case 's':
688 no_output = true;
689 break;
690 case 'T':
691 timeout = atoi(optarg);
692 break;
693 case 'Y':
694 if (strcmp(optarg, "on") != 0)
695 proxy = false;
696 break;
697 default:
698 return usage(progname);
699 }
700 }
701
702 argv += optind;
703 argc -= optind;
704
705 if (verify && !has_cert)
706 default_certs = true;
707
708 if (argc < 1)
709 return usage(progname);
710
711 if (!ssl_ctx) {
712 for (i = 0; i < argc; i++) {
713 if (!strncmp(argv[i], "https", 5))
714 return no_ssl(progname);
715 }
716 }
717
718 urls = argv + 1;
719 n_urls = argc - 1;
720
721 uloop_init();
722
723 if (username) {
724 if (password) {
725 rc = asprintf(&auth_str, "%s:%s", username, password);
726 if (rc < 0)
727 return rc;
728 } else
729 auth_str = username;
730 }
731
732 if (!quiet)
733 fprintf(stderr, "Downloading '%s'\n", argv[0]);
734
735 proxy_url = get_proxy_url(argv[0]);
736 if (proxy_url) {
737 cl = uclient_new(proxy_url, auth_str, &cb);
738 if (cl)
739 uclient_set_proxy_url(cl, argv[0], NULL);
740 } else {
741 cl = uclient_new(argv[0], auth_str, &cb);
742 }
743 if (!cl) {
744 fprintf(stderr, "Failed to allocate uclient context\n");
745 return 1;
746 }
747 if (af >= 0)
748 uclient_http_set_address_family(cl, af);
749
750 if (ssl_ctx && default_certs)
751 init_ca_cert();
752
753 cur_resume = resume;
754 rc = init_request(cl);
755 if (!rc) {
756 /* no error received, we can enter main loop */
757 uloop_run();
758 } else {
759 fprintf(stderr, "Failed to send request: %s\n", strerror(rc));
760 error_ret = 4;
761 }
762
763 uloop_done();
764
765 uclient_free(cl);
766
767 if (output_fd >= 0 && output_fd != STDOUT_FILENO)
768 close(output_fd);
769
770 if (ssl_ctx)
771 ssl_ops->context_free(ssl_ctx);
772
773 return error_ret;
774 }