/* ----------------------------------------------------------------------- *
 *
 *   Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin
 *
 *   Permission is hereby granted, free of charge, to any person
 *   obtaining a copy of this software and associated documentation
 *   files (the "Software"), to deal in the Software without
 *   restriction, including without limitation the rights to use,
 *   copy, modify, merge, publish, distribute, sublicense, and/or
 *   sell copies of the Software, and to permit persons to whom
 *   the Software is furnished to do so, subject to the following
 *   conditions:
 *
 *   The above copyright notice and this permission notice shall
 *   be included in all copies or substantial portions of the Software.
 *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 *   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 *   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 *   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 *   OTHER DEALINGS IN THE SOFTWARE.
 *
 * ----------------------------------------------------------------------- */

/*
 * urlparse.c
 */

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "url.h"

/*
 * Return the type of a URL without modifying the string
 */
enum url_type url_type(const char *url)
{
    const char *q;

    q = strchr(url, ':');
    if (!q)
	return URL_SUFFIX;

    if (q[1] == '/' && q[2] == '/')
	return URL_NORMAL;

    if (q[1] == ':')
	return URL_OLD_TFTP;

    return URL_SUFFIX;
}

/*
 * Decompose a URL into its components.  This is done in-place;
 * this routine does not allocate any additional storage.  Freeing the
 * original buffer frees all storage used.
 */
void parse_url(struct url_info *ui, char *url)
{
    char *p = url;
    char *q, *r, *s;
    int c;

    memset(ui, 0, sizeof *ui);

    q = strchr(p, ':');
    if (q && (q[1] == '/' && q[2] == '/')) {
	ui->type = URL_NORMAL;
	
	ui->scheme = p;
	*q = '\0';
	p = q+3;
	
	q = strchr(p, '/');
	if (q) {
	    *q = '\0';
	    ui->path = q+1;
	    q = strchr(q+1, '#');
	    if (q)
		*q = '\0';
	} else {
	    ui->path = "";
	}
	
	r = strchr(p, '@');
	if (r) {
	    ui->user = p;
	    *r = '\0';
	    s = strchr(p, ':');
	    if (s) {
		*s = '\0';
		ui->passwd = s+1;
	    }
	    p = r+1;
	}
	
	ui->host = p;
	r = strchr(p, ':');
	if (r) {
	    *r++ = '\0';
	    ui->port = 0;
	    while ((c = *r++)) {
		c -= '0';
		if (c > 9)
		    break;
		ui->port = ui->port * 10 + c;
	    }
	}
    } else if (q && q[1] == ':') {
	*q = '\0';
	ui->scheme = "tftp";
	ui->host = p;
	ui->path = q+2;
	ui->type = URL_OLD_TFTP;
    } else {
	ui->path = p;
	ui->type = URL_SUFFIX;
    }
}

/*
 * Escapes unsafe characters in a URL.
 * This does *not* escape things like query characters!
 * Returns the number of characters in the total output.
 */
size_t url_escape_unsafe(char *output, const char *input, size_t bufsize)
{
    static const char uchexchar[] = "0123456789ABCDEF";
    const char *p;
    unsigned char c;
    char *q;
    size_t n = 0;

    q = output;
    for (p = input; (c = *p); p++) {
	if (c <= ' ' || c > '~') {
	    if (++n < bufsize) *q++ = '%';
	    if (++n < bufsize) *q++ = uchexchar[c >> 4];
	    if (++n < bufsize) *q++ = uchexchar[c & 15];
	} else {
	    if (++n < bufsize) *q++ = c;
	}
    }

    *q = '\0';
    return n;
}

static int hexdigit(char c)
{
    if (c >= '0' && c <= '9')
	return c - '0';
    c |= 0x20;
    if (c >= 'a' && c <= 'f')
	return c - 'a' + 10;
    return -1;
}

/*
 * Unescapes a buffer, optionally ending at an *unescaped* terminator
 * (like ; for TFTP).  The unescaping is done in-place.
 *
 * If a terminator is reached, return a pointer to the first character
 * after the terminator.
 */
char *url_unescape(char *buffer, char terminator)
{
    char *p = buffer;
    char *q = buffer;
    unsigned char c;
    int x, y;

    while ((c = *p)) {
	if (c == terminator) {
	    *q = '\0';
	    return p;
	}
	p++;
	if (c == '%') {
	    x = hexdigit(p[0]);
	    if (x >= 0) {
		y = hexdigit(p[1]);
		if (y >= 0) {
		    *q++ = (x << 4) + y;
		    p += 2;
		    continue;
		}
	    }
	}
	*q++ = c;
    }
    *q = '\0';
    return NULL;
}

#ifdef URL_TEST

int main(int argc, char *argv[])
{
    int i;
    struct url_info url;

    for (i = 1; i < argc; i++) {
	parse_url(&url, argv[i]);
	printf("scheme:  %s\n"
	       "user:    %s\n"
	       "passwd:  %s\n"
	       "host:    %s\n"
	       "port:    %d\n"
	       "path:    %s\n"
	       "type:    %d\n",
	       url.scheme, url.user, url.passwd, url.host, url.port,
	       url.path, url.type);
    }

    return 0;
}

#endif