diff options
| author | Dag-Erling Smørgrav <des@FreeBSD.org> | 2000-07-12 10:39:56 +0000 |
|---|---|---|
| committer | Dag-Erling Smørgrav <des@FreeBSD.org> | 2000-07-12 10:39:56 +0000 |
| commit | e4878e39a30e7b26cd3ae1937a6d08a263d32443 (patch) | |
| tree | a5708bdb0b774fdea79d08322379566ead8ffce1 | |
| parent | d70f8cefa078ff831b7875d8ee2d997668b19273 (diff) | |
Notes
| -rw-r--r-- | lib/libfetch/http.c | 1131 |
1 files changed, 685 insertions, 446 deletions
diff --git a/lib/libfetch/http.c b/lib/libfetch/http.c index d8ae7444acb7..dfe1314f4676 100644 --- a/lib/libfetch/http.c +++ b/lib/libfetch/http.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1998 Dag-Erling Coïdan Smørgrav + * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -12,7 +12,7 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission + * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES @@ -25,46 +25,15 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD$ */ -/* - * The base64 code in this file is based on code from MIT fetch, which - * has the following copyright and license: - * - *- - * Copyright 1997 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ - #include <sys/param.h> #include <sys/socket.h> -#include <err.h> #include <ctype.h> +#include <err.h> +#include <errno.h> #include <locale.h> #include <netdb.h> #include <stdarg.h> @@ -78,148 +47,402 @@ #include "common.h" #include "httperr.h" -extern char *__progname; +extern char *__progname; /* XXX not portable */ -#define ENDL "\r\n" +/* Maximum number of redirects to follow */ +#define MAX_REDIRECT 5 -#define HTTP_OK 200 -#define HTTP_PARTIAL 206 -#define HTTP_MOVED 302 +/* Symbolic names for reply codes we care about */ +#define HTTP_OK 200 +#define HTTP_PARTIAL 206 +#define HTTP_MOVED_PERM 301 +#define HTTP_MOVED_TEMP 302 +#define HTTP_SEE_OTHER 303 +#define HTTP_NEED_AUTH 401 +#define HTTP_NEED_PROXY_AUTH 403 +#define HTTP_PROTOCOL_ERROR 999 + +#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ + || (xyz) == HTTP_MOVED_TEMP \ + || (xyz) == HTTP_SEE_OTHER) + + + +/***************************************************************************** + * I/O functions for decoding chunked streams + */ struct cookie { - FILE *real_f; -#define ENC_NONE 0 -#define ENC_CHUNKED 1 - int encoding; /* 1 = chunked, 0 = none */ -#define HTTPCTYPELEN 59 - char content_type[HTTPCTYPELEN+1]; - char *buf; - int b_cur, eof; - unsigned b_len, chunksize; + int fd; + char *buf; + size_t b_size; + size_t b_len; + int b_pos; + int eof; + int error; + long chunksize; +#ifdef DEBUG + long total; +#endif }; /* - * Send a formatted line; optionally echo to terminal + * Get next chunk header */ static int -_http_cmd(FILE *f, char *fmt, ...) +_http_new_chunk(struct cookie *c) { - va_list ap; - - va_start(ap, fmt); - vfprintf(f, fmt, ap); - DEBUG(fprintf(stderr, "\033[1m>>> ")); - DEBUG(vfprintf(stderr, fmt, ap)); - DEBUG(fprintf(stderr, "\033[m")); - va_end(ap); + char *p; + + if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1) + return -1; + + if (c->b_len < 2 || !ishexnumber(*c->buf)) + return -1; + + for (p = c->buf; !isspace(*p) && *p != ';' && p < c->buf + c->b_len; ++p) + if (!ishexnumber(*p)) + return -1; + else if (isdigit(*p)) + c->chunksize = c->chunksize * 16 + *p - '0'; + else + c->chunksize = c->chunksize * 16 + 10 + tolower(*p) - 'a'; + +#ifdef DEBUG + c->total += c->chunksize; + if (c->chunksize == 0) + fprintf(stderr, "\033[1m_http_fillbuf(): " + "end of last chunk\033[m\n"); + else + fprintf(stderr, "\033[1m_http_fillbuf(): " + "new chunk: %ld (%ld)\033[m\n", c->chunksize, c->total); +#endif - return 0; /* XXX */ + return c->chunksize; } /* * Fill the input buffer, do chunk decoding on the fly */ -static char * +static int _http_fillbuf(struct cookie *c) { - char *ln; - unsigned int len; - + if (c->error) + return -1; if (c->eof) - return NULL; - - if (c->encoding == ENC_NONE) { - c->buf = fgetln(c->real_f, &(c->b_len)); - c->b_cur = 0; - } else if (c->encoding == ENC_CHUNKED) { - if (c->chunksize == 0) { - ln = fgetln(c->real_f, &len); - if (len <= 2) - return NULL; - DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " - "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); - sscanf(ln, "%x", &(c->chunksize)); - if (!c->chunksize) { - DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " - "end of last chunk\033[m\n")); - c->eof = 1; - return NULL; - } - DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " - "new chunk: %X\033[m\n", c->chunksize)); + return 0; + + if (c->chunksize == 0) { + switch (_http_new_chunk(c)) { + case -1: + c->error = 1; + return -1; + case 0: + c->eof = 1; + return 0; } - c->buf = fgetln(c->real_f, &(c->b_len)); - if (c->b_len > c->chunksize) - c->b_len = c->chunksize; - c->chunksize -= c->b_len; - c->b_cur = 0; } - else return NULL; /* unknown encoding */ - return c->buf; + + if (c->b_size < c->chunksize) { + char *tmp; + + if ((tmp = realloc(c->buf, c->chunksize)) == NULL) + return -1; + c->buf = tmp; + c->b_size = c->chunksize; + } + + if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1) + return -1; + c->chunksize -= c->b_len; + + if (c->chunksize == 0) { + char endl[2]; + read(c->fd, endl, 2); + } + + c->b_pos = 0; + + return c->b_len; } /* * Read function */ static int -_http_readfn(struct cookie *c, char *buf, int len) +_http_readfn(void *v, char *buf, int len) { - int l, pos = 0; - while (len) { + struct cookie *c = (struct cookie *)v; + int l, pos; + + if (c->error) + return -1; + if (c->eof) + return 0; + + for (pos = 0; len > 0; pos += l, len -= l) { /* empty buffer */ - if (!c->buf || (c->b_cur == c->b_len)) - if (!_http_fillbuf(c)) + if (!c->buf || c->b_pos == c->b_len) + if (_http_fillbuf(c) < 1) break; - - l = c->b_len - c->b_cur; - if (len < l) l = len; - memcpy(buf + pos, c->buf + c->b_cur, l); - c->b_cur += l; - pos += l; - len -= l; + l = c->b_len - c->b_pos; + if (len < l) + l = len; + bcopy(c->buf + c->b_pos, buf + pos, l); + c->b_pos += l; } - - if (ferror(c->real_f)) + + if (!pos && c->error) return -1; - else return pos; + return pos; } /* * Write function */ static int -_http_writefn(struct cookie *c, const char *buf, int len) +_http_writefn(void *v, const char *buf, int len) { - size_t r = fwrite(buf, 1, (size_t)len, c->real_f); - return r ? r : -1; + struct cookie *c = (struct cookie *)v; + + return write(c->fd, buf, len); } /* * Close function */ static int -_http_closefn(struct cookie *c) +_http_closefn(void *v) { - int r = fclose(c->real_f); + struct cookie *c = (struct cookie *)v; + int r; + + r = close(c->fd); + if (c->buf) + free(c->buf); free(c); - return (r == EOF) ? -1 : 0; + return r; +} + +/* + * Wrap a file descriptor up + */ +static FILE * +_http_funopen(int fd) +{ + struct cookie *c; + FILE *f; + + if ((c = calloc(1, sizeof *c)) == NULL) { + _fetch_syserr(); + return NULL; + } + c->fd = fd; + if (!(f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn))) { + _fetch_syserr(); + free(c); + return NULL; + } + return f; +} + + +/***************************************************************************** + * Helper functions for talking to the server and parsing its replies + */ + +/* Header types */ +typedef enum { + hdr_syserror = -2, + hdr_error = -1, + hdr_end = 0, + hdr_unknown = 1, + hdr_content_length, + hdr_content_range, + hdr_last_modified, + hdr_location, + hdr_transfer_encoding +} hdr; + +/* Names of interesting headers */ +static struct { + hdr num; + char *name; +} hdr_names[] = { + { hdr_content_length, "Content-Length" }, + { hdr_content_range, "Content-Range" }, + { hdr_last_modified, "Last-Modified" }, + { hdr_location, "Location" }, + { hdr_transfer_encoding, "Transfer-Encoding" }, + { hdr_unknown, NULL }, +}; + +static char *reply_buf; +static size_t reply_size; +static size_t reply_length; + +/* + * Send a formatted line; optionally echo to terminal + */ +static int +_http_cmd(int fd, char *fmt, ...) +{ + va_list ap; + size_t len; + char *msg; + int r; + + va_start(ap, fmt); + len = vasprintf(&msg, fmt, ap); + va_end(ap); + + if (msg == NULL) { + errno = ENOMEM; + _fetch_syserr(); + return -1; + } + + r = _fetch_putln(fd, msg, len); + free(msg); + + if (r == -1) { + _fetch_syserr(); + return -1; + } + + return 0; +} + +/* + * Get and parse status line + */ +static int +_http_get_reply(int fd) +{ + if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1) + return -1; + /* + * A valid status line looks like "HTTP/m.n xyz reason" where m + * and n are the major and minor protocol version numbers and xyz + * is the reply code. + * We grok HTTP 1.0 and 1.1, so m must be 1 and n must be 0 or 1. + * We don't care about the reason phrase. + */ + if (strncmp(reply_buf, "HTTP/1.", 7) != 0 + || (reply_buf[7] != '0' && reply_buf[7] != '1') || reply_buf[8] != ' ' + || !isdigit(reply_buf[9]) + || !isdigit(reply_buf[10]) + || !isdigit(reply_buf[11])) + return HTTP_PROTOCOL_ERROR; + + return ((reply_buf[9] - '0') * 100 + + (reply_buf[10] - '0') * 10 + + (reply_buf[11] - '0')); } /* - * Extract content type from cookie + * Check a header; if the type matches the given string, return a + * pointer to the beginning of the value. */ -char * -fetchContentType(FILE *f) +static char * +_http_match(char *str, char *hdr) { + while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) + /* nothing */; + if (*str || *hdr != ':') + return NULL; + while (*hdr && isspace(*++hdr)) + /* nothing */; + return hdr; +} + +/* + * Get the next header and return the appropriate symbolic code. + */ +static hdr +_http_next_header(int fd, char **p) +{ + int i; + + if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1) + return hdr_syserror; + while (reply_length && isspace(reply_buf[reply_length-1])) + reply_length--; + reply_buf[reply_length] = 0; + if (reply_length == 0) + return hdr_end; /* - * We have no way of making sure this really *is* one of our cookies, - * so just check for a null pointer and hope for the best. + * We could check for malformed headers but we don't really care. + * A valid header starts with a token immediately followed by a + * colon; a token is any sequence of non-control, non-whitespace + * characters except "()<>@,;:\\\"{}". */ - return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; + for (i = 0; hdr_names[i].num != hdr_unknown; i++) + if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL) + return hdr_names[i].num; + return hdr_unknown; +} + +/* + * Parse a last-modified header + */ +static time_t +_http_parse_mtime(char *p) +{ + char locale[64]; + struct tm tm; + + strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); + setlocale(LC_TIME, "C"); + strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); + /* XXX should add support for date-2 and date-3 */ + setlocale(LC_TIME, locale); + DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " + "%02d:%02d:%02d\033[m]\n", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec)); + return timegm(&tm); } /* + * Parse a content-length header + */ +static off_t +_http_parse_length(char *p) +{ + off_t len; + + for (len = 0; *p && isdigit(*p); ++p) + len = len * 10 + (*p - '0'); + DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", len)); + return len; +} + +/* + * Parse a content-range header + */ +static off_t +_http_parse_range(char *p) +{ + off_t off; + + if (strncasecmp(p, "bytes ", 6) != 0) + return -1; + for (p += 6, off = 0; *p && isdigit(*p); ++p) + off = off * 10 + *p - '0'; + if (*p != '-') + return -1; + DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", off)); + return off; +} + + +/***************************************************************************** + * Helper functions for authorization + */ + +/* * Base64 encoding */ static char * @@ -279,7 +502,7 @@ _http_base64(char *src) * Encode username and password */ static int -_http_basic_auth(FILE *f, char *hdr, char *usr, char *pwd) +_http_basic_auth(int fd, char *hdr, char *usr, char *pwd) { char *upw, *auth; int r; @@ -290,7 +513,7 @@ _http_basic_auth(FILE *f, char *hdr, char *usr, char *pwd) free(upw); if (auth == NULL) return -1; - r = _http_cmd(f, "%s: Basic %s" ENDL, hdr, auth); + r = _http_cmd(fd, "%s: Basic %s", hdr, auth); free(auth); return r; } @@ -299,7 +522,7 @@ _http_basic_auth(FILE *f, char *hdr, char *usr, char *pwd) * Send an authorization header */ static int -_http_authorize(FILE *f, char *hdr, char *p) +_http_authorize(int fd, char *hdr, char *p) { /* basic authorization */ if (strncasecmp(p, "basic:", 6) == 0) { @@ -316,40 +539,138 @@ _http_authorize(FILE *f, char *hdr, char *p) user = str; pwd = strchr(str, ':'); *pwd++ = '\0'; - r = _http_basic_auth(f, hdr, user, pwd); + r = _http_basic_auth(fd, hdr, user, pwd); free(str); return r; } return -1; } + +/***************************************************************************** + * Helper functions for connecting to a server or proxy + */ + /* - * Connect to server or proxy + * Connect to the specified HTTP proxy server. */ -static FILE * -_http_connect(struct url *URL, char *flags, int *proxy) +static int +_http_proxy_connect(char *proxy, int af, int verbose) { - int direct, sd = -1, verbose; + char *hostname, *p; + int fd, port; + + /* get hostname */ + hostname = NULL; #ifdef INET6 - int af = AF_UNSPEC; -#else - int af = AF_INET; + /* host part can be an IPv6 address enclosed in square brackets */ + if (*proxy == '[') { + if ((p = strchr(proxy, ']')) == NULL) { + /* no terminating bracket */ + /* XXX should set an error code */ + goto ouch; + } + if (p[1] != '\0' && p[1] != ':') { + /* garbage after address */ + /* XXX should set an error code */ + goto ouch; + } + if ((hostname = malloc(p - proxy)) == NULL) { + errno = ENOMEM; + _fetch_syserr(); + goto ouch; + } + strncpy(hostname, proxy + 1, p - proxy - 1); + hostname[p - proxy - 1] = '\0'; + ++p; + } else { +#endif /* INET6 */ + if ((p = strchr(proxy, ':')) == NULL) + p = strchr(proxy, '\0'); + if ((hostname = malloc(p - proxy + 1)) == NULL) { + errno = ENOMEM; + _fetch_syserr(); + goto ouch; + } + strncpy(hostname, proxy, p - proxy); + hostname[p - proxy] = '\0'; +#ifdef INET6 + } +#endif /* INET6 */ + DEBUG(fprintf(stderr, "proxy name: [%s]\n", hostname)); + + /* get port number */ + port = 0; + if (*p == ':') { + ++p; + if (strspn(p, "0123456789") != strlen(p) || strlen(p) > 5) { + /* port number is non-numeric or too long */ + /* XXX should set an error code */ + goto ouch; + } + port = atoi(p); + if (port < 1 || port > 65535) { + /* port number is out of range */ + /* XXX should set an error code */ + goto ouch; + } + } + + if (!port) { +#if 0 + /* + * commented out, since there is currently no service name + * for HTTP proxies + */ + struct servent *se; + + if ((se = getservbyname("xxxx", "tcp")) != NULL) + port = ntohs(se->s_port); + else #endif - size_t len; - char *px; - FILE *f; + port = 3128; + } + DEBUG(fprintf(stderr, "proxy port: %d\n", port)); + + /* connect */ + if ((fd = _fetch_connect(hostname, port, af, verbose)) == -1) + _fetch_syserr(); + return fd; + ouch: + if (hostname) + free(hostname); + return -1; +} + +/* + * Connect to the correct HTTP server or proxy. + */ +static int +_http_connect(struct url *URL, int *proxy, char *flags) +{ + int direct, verbose; + int af, fd; + char *p; + +#ifdef INET6 + af = AF_UNSPEC; +#else + af = AF_INET; +#endif + direct = (flags && strchr(flags, 'd')); verbose = (flags && strchr(flags, 'v')); - if ((flags && strchr(flags, '4'))) + if (flags && strchr(flags, '4')) af = AF_INET; - else if ((flags && strchr(flags, '6'))) + else if (flags && strchr(flags, '6')) af = AF_INET6; /* check port */ if (!URL->port) { struct servent *se; + /* Scheme can be ftp if we're using a proxy */ if (strcasecmp(URL->scheme, "ftp") == 0) if ((se = getservbyname("ftp", "tcp")) != NULL) URL->port = ntohs(se->s_port); @@ -362,166 +683,254 @@ _http_connect(struct url *URL, char *flags, int *proxy) URL->port = 80; } - /* attempt to connect to proxy server */ - if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { - char host[MAXHOSTNAMELEN]; - int port = 0; + if (!direct && (p = getenv("HTTP_PROXY")) != NULL) { + /* attempt to connect to proxy server */ + if ((fd = _http_proxy_connect(p, af, verbose)) == -1) + return -1; + *proxy = 1; + } else { + /* if no proxy is configured, try direct */ + if (strcasecmp(URL->scheme, "ftp") == 0) { + /* can't talk http to an ftp server */ + /* XXX should set an error code */ + return -1; + } + if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1) + /* _fetch_connect() has already set an error code */ + return -1; + *proxy = 0; + } - /* measure length */ + return fd; +} + + +/***************************************************************************** + * Core + */ + +/* + * Send a request and process the reply + */ +static FILE * +_http_request(struct url *URL, char *op, struct url_stat *us, char *flags) +{ + struct url *url, *new; + int chunked, need_auth, noredirect, proxy, verbose; + int code, fd, i, n; + off_t offset; + char *p; + FILE *f; + hdr h; + char *host; #ifdef INET6 - if (px[0] != '[' || - (len = strcspn(px, "]")) >= strlen(px) || - (px[++len] != '\0' && px[len] != ':')) + char hbuf[MAXHOSTNAMELEN + 1]; #endif - len = strcspn(px, ":"); - /* get port (XXX atoi is a little too tolerant perhaps?) */ - if (px[len] == ':') { - if (strspn(px+len+1, "0123456789") != strlen(px+len+1) - || strlen(px+len+1) > 5) { - /* XXX we should emit some kind of warning */ - } - port = atoi(px+len+1); - if (port < 1 || port > 65535) { - /* XXX we should emit some kind of warning */ + noredirect = (flags && strchr(flags, 'A')); + verbose = (flags && strchr(flags, 'v')); + + n = noredirect ? 1 : MAX_REDIRECT; + + us->size = -1; + us->atime = us->mtime = 0; + chunked = 0; + offset = 0; + fd = -1; + code = HTTP_PROTOCOL_ERROR; /* just to appease a compiler warning */ + + for (url = URL, i = 0; i < n; ++i) { + need_auth = 0; + retry: + /* connect to server or proxy */ + if ((fd = _http_connect(url, &proxy, flags)) == -1) + goto ouch; + + host = url->host; +#ifdef INET6 + if (strchr(url->host, ':')) { + snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); + host = hbuf; + } +#endif + + /* send request */ + if (verbose) + _fetch_info("requesting %s://%s:%d%s", + url->scheme, host, url->port, url->doc); + if (proxy) { + _http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1", + op, url->scheme, host, url->port, url->doc); + } else { + _http_cmd(fd, "%s %s HTTP/1.1", + op, url->doc); + } + + /* proxy authorization */ + if (proxy && (p = getenv("HTTP_PROXY_AUTH")) != NULL) + _http_authorize(fd, "Proxy-Authorization", p); + + /* server authorization */ + if (need_auth) { + if (*url->user || *url->pwd) + _http_basic_auth(fd, "Authorization", + url->user ? url->user : "", + url->pwd ? url->pwd : ""); + else if ((p = getenv("HTTP_AUTH")) != NULL) + _http_authorize(fd, "Authorization", p); + else { + _http_seterr(HTTP_NEED_AUTH); + goto ouch; } } - if (!port) { -#if 0 + + /* other headers */ + _http_cmd(fd, "Host: %s:%d", host, url->port); + _http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, __progname); + if (URL->offset) + _http_cmd(fd, "Range: bytes=%lld-", url->offset); + _http_cmd(fd, "Connection: close"); + _http_cmd(fd, ""); + + /* get reply */ + switch ((code = _http_get_reply(fd))) { + case HTTP_OK: + case HTTP_PARTIAL: + /* fine */ + break; + case HTTP_MOVED_PERM: + case HTTP_MOVED_TEMP: /* - * commented out, since there is currently no service name - * for HTTP proxies + * Not so fine, but we still have to read the headers to + * get the new location. */ - struct servent *se; - - if ((se = getservbyname("xxxx", "tcp")) != NULL) - port = ntohs(se->s_port); - else -#endif - port = 3128; + break; + case HTTP_NEED_AUTH: + if (need_auth) { + /* + * We already sent out authorization code, so there's + * nothing more we can do. + */ + _http_seterr(code); + goto ouch; + } + /* try again, but send the password this time */ + if (verbose) + _fetch_info("server requires authorization"); + need_auth = 1; + close(fd); + goto retry; + case HTTP_NEED_PROXY_AUTH: + /* + * If we're talking to a proxy, we already sent our proxy + * authorization code, so there's nothing more we can do. + */ + _http_seterr(code); + goto ouch; + case HTTP_PROTOCOL_ERROR: + /* fall through */ + case -1: + _fetch_syserr(); + goto ouch; + default: + _http_seterr(code); + goto ouch; } - /* get host name */ -#ifdef INET6 - if (len > 1 && px[0] == '[' && px[len - 1] == ']') { - px++; - len -= 2; - } -#endif - if (len >= MAXHOSTNAMELEN) - len = MAXHOSTNAMELEN - 1; - strncpy(host, px, len); - host[len] = 0; + /* get headers */ + do { + switch ((h = _http_next_header(fd, &p))) { + case hdr_syserror: + _fetch_syserr(); + goto ouch; + case hdr_error: + _http_seterr(HTTP_PROTOCOL_ERROR); + goto ouch; + case hdr_content_length: + us->size = _http_parse_length(p); + break; + case hdr_content_range: + offset = _http_parse_range(p); + break; + case hdr_last_modified: + us->atime = us->mtime = _http_parse_mtime(p); + break; + case hdr_location: + if (!HTTP_REDIRECT(code)) + break; + if (verbose) + _fetch_info("%d redirect to %s", code, p); + if ((new = fetchParseURL(p)) == NULL) + /* invalid location */ + goto ouch; + if (!*new->user && !*new->pwd) { + strcpy(new->user, url->user); + strcpy(new->pwd, url->pwd); + } + new->offset = url->offset; + new->length = url->length; + close(fd); + us->size = -1; + us->atime = us->mtime = 0; + chunked = 0; + offset = 0; + fd = -1; + if (url != URL) + fetchFreeURL(url); + url = new; + continue; + case hdr_transfer_encoding: + /* XXX weak test*/ + chunked = (strcasecmp(p, "chunked") == 0); + break; + case hdr_end: + /* fall through */ + case hdr_unknown: + /* ignore */ + break; + } + } while (h > hdr_end); - /* connect */ - sd = _fetch_connect(host, port, af, verbose); + if (code == HTTP_OK || code == HTTP_PARTIAL) + break; } - /* if no proxy is configured or could be contacted, try direct */ - *proxy = (sd != -1); - if (!*proxy) { - if (strcasecmp(URL->scheme, "ftp") == 0) - goto ouch; - if ((sd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1) - goto ouch; + /* no success */ + if (fd == -1) { + _http_seterr(code); + goto ouch; } - /* reopen as stream */ - if ((f = fdopen(sd, "r+")) == NULL) + /* wrap it up in a FILE */ + if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) { + _fetch_syserr(); goto ouch; + } + + while (offset++ < url->offset) + if (fgetc(f) == EOF) { + _fetch_syserr(); + fclose(f); + f = NULL; + } + + if (url != URL) + fetchFreeURL(url); return f; -ouch: - if (sd >= 0) - close(sd); - _http_seterr(999); /* XXX do this properly RSN */ + ouch: + if (url != URL) + fetchFreeURL(url); + if (fd != -1) + close(fd); return NULL; } -/* - * Check a header line - */ -static char * -_http_match(char *str, char *hdr) -{ - while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) - /* nothing */; - if (*str || *hdr != ':') - return NULL; - while (*hdr && isspace(*++hdr)) - /* nothing */; - return hdr; -} - -/* - * Send a HEAD or GET request + +/***************************************************************************** + * Entry points */ -static int -_http_request(FILE *f, char *op, struct url *URL, char *flags, int proxy) -{ - int e, verbose; - char *ln, *p; - size_t len; - char *host; -#ifdef INET6 - char hbuf[MAXHOSTNAMELEN + 1]; -#endif - - verbose = (flags && strchr(flags, 'v')); - - host = URL->host; -#ifdef INET6 - if (strchr(URL->host, ':')) { - snprintf(hbuf, sizeof(hbuf), "[%s]", URL->host); - host = hbuf; - } -#endif - - /* send request (proxies require absolute form) */ - if (verbose) - _fetch_info("requesting %s://%s:%d%s", - URL->scheme, host, URL->port, URL->doc); - if (proxy) - _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL, - op, URL->scheme, host, URL->port, URL->doc); - else - _http_cmd(f, "%s %s HTTP/1.1" ENDL, op, URL->doc); - - /* start sending headers away */ - if (URL->user[0] || URL->pwd[0]) - _http_basic_auth(f, "Authorization", - URL->user ? URL->user : "", - URL->pwd ? URL->pwd : ""); - else if ((p = getenv("HTTP_AUTH")) != NULL) - _http_authorize(f, "Authorization", p); - if (proxy && (p = getenv("HTTP_PROXY_AUTH")) != NULL) - _http_authorize(f, "Proxy-Authorization", p); - _http_cmd(f, "Host: %s:%d" ENDL, host, URL->port); - _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); - if (URL->offset) - _http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset); - _http_cmd(f, "Connection: close" ENDL ENDL); - - /* get response */ - if ((ln = fgetln(f, &len)) == NULL) - return 999; - DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", - (int)len-2, (int)len-2, ln)); - - /* we can't use strchr() and friends since ln isn't NUL-terminated */ - p = ln; - while ((p < ln + len) && !isspace(*p)) - p++; - while ((p < ln + len) && !isdigit(*p)) - p++; - if (!isdigit(*p)) - return 999; - - e = atoi(p); - DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); - return e; -} /* * Retrieve a file by HTTP @@ -529,111 +938,9 @@ _http_request(FILE *f, char *op, struct url *URL, char *flags, int proxy) FILE * fetchGetHTTP(struct url *URL, char *flags) { - int e, enc = ENC_NONE, i, noredirect, proxy; - struct cookie *c; - char *ln, *p, *q; - FILE *f, *cf; - size_t len; - off_t pos = 0; - - noredirect = (flags && strchr(flags, 'A')); + struct url_stat us; - /* allocate cookie */ - if ((c = calloc(1, sizeof *c)) == NULL) - return NULL; - - /* connect */ - if ((f = _http_connect(URL, flags, &proxy)) == NULL) { - free(c); - return NULL; - } - c->real_f = f; - - e = _http_request(f, "GET", URL, flags, proxy); - if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK) - && (e != HTTP_MOVED || noredirect)) { - _http_seterr(e); - free(c); - fclose(f); - return NULL; - } - - /* browse through header */ - while (1) { - if ((ln = fgetln(f, &len)) == NULL) - goto fouch; - if ((ln[0] == '\r') || (ln[0] == '\n')) - break; - while (isspace(ln[len-1])) - --len; - ln[len] = '\0'; /* XXX */ - DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); - if ((p = _http_match("Location", ln)) != NULL) { - struct url *url; - - for (q = p; *q && !isspace(*q); q++) - /* VOID */ ; - *q = 0; - if ((url = fetchParseURL(p)) == NULL) - goto fouch; - url->offset = URL->offset; - url->length = URL->length; - DEBUG(fprintf(stderr, "location: [\033[1m%s\033[m]\n", p)); - cf = fetchGetHTTP(url, flags); - fetchFreeURL(url); - fclose(f); - return cf; - } else if ((p = _http_match("Transfer-Encoding", ln)) != NULL) { - for (q = p; *q && !isspace(*q); q++) - /* VOID */ ; - *q = 0; - if (strcasecmp(p, "chunked") == 0) - enc = ENC_CHUNKED; - DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p)); - } else if ((p = _http_match("Content-Type", ln)) != NULL) { - for (i = 0; *p && i < HTTPCTYPELEN; p++, i++) - c->content_type[i] = *p; - do c->content_type[i--] = 0; while (isspace(c->content_type[i])); - DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n", - c->content_type)); - } else if ((p = _http_match("Content-Range", ln)) != NULL) { - if (strncasecmp(p, "bytes ", 6) != 0) - goto fouch; - p += 6; - while (*p && isdigit(*p)) - pos = pos * 10 + (*p++ - '0'); - /* XXX wouldn't hurt to be slightly more paranoid here */ - DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos)); - if (pos > URL->offset) - goto fouch; - } - } - - /* only body remains */ - c->encoding = enc; - cf = funopen(c, - (int (*)(void *, char *, int))_http_readfn, - (int (*)(void *, const char *, int))_http_writefn, - (fpos_t (*)(void *, fpos_t, int))NULL, - (int (*)(void *))_http_closefn); - if (cf == NULL) - goto fouch; - - while (pos < URL->offset) - if (fgetc(cf) == EOF) - goto cfouch; - - return cf; - -fouch: - fclose(f); - free(c); - _http_seterr(999); /* XXX do this properly RSN */ - return NULL; -cfouch: - fclose(cf); - _http_seterr(999); /* XXX do this properly RSN */ - return NULL; + return _http_request(URL, "GET", &us, flags); } FILE * @@ -649,80 +956,12 @@ fetchPutHTTP(struct url *URL, char *flags) int fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) { - int e, noredirect, proxy; - size_t len; - char *ln, *p, *q; FILE *f; - - noredirect = (flags && strchr(flags, 'A')); - - us->size = -1; - us->atime = us->mtime = 0; - /* connect */ - if ((f = _http_connect(URL, flags, &proxy)) == NULL) - return -1; - - e = _http_request(f, "HEAD", URL, flags, proxy); - if (e != HTTP_OK && (e != HTTP_MOVED || noredirect)) { - _http_seterr(e); - fclose(f); + if ((f = _http_request(URL, "HEAD", us, flags)) == NULL) return -1; - } - - while (1) { - if ((ln = fgetln(f, &len)) == NULL) - goto fouch; - if ((ln[0] == '\r') || (ln[0] == '\n')) - break; - while (isspace(ln[len-1])) - --len; - ln[len] = '\0'; /* XXX */ - DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); - if ((p = _http_match("Location", ln)) != NULL) { - struct url *url; - - for (q = p; *q && !isspace(*q); q++) - /* VOID */ ; - *q = 0; - if ((url = fetchParseURL(p)) == NULL) - goto ouch; - url->offset = URL->offset; - url->length = URL->length; - DEBUG(fprintf(stderr, "location: [\033[1m%s\033[m]\n", p)); - e = fetchStatHTTP(url, us, flags); - fetchFreeURL(url); - fclose(f); - return e; - } else if ((p = _http_match("Last-Modified", ln)) != NULL) { - struct tm tm; - char locale[64]; - - strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); - setlocale(LC_TIME, "C"); - strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); - /* XXX should add support for date-2 and date-3 */ - setlocale(LC_TIME, locale); - us->atime = us->mtime = timegm(&tm); - DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " - "%02d:%02d:%02d\033[m]\n", - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec)); - } else if ((p = _http_match("Content-Length", ln)) != NULL) { - us->size = 0; - while (*p && isdigit(*p)) - us->size = us->size * 10 + (*p++ - '0'); - DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size)); - } - } - fclose(f); return 0; - ouch: - _http_seterr(999); /* XXX do this properly RSN */ - fouch: - fclose(f); - return -1; } /* |
