/* ----------------------------------------------------------------------- *
*
* Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall
* be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* ----------------------------------------------------------------------- */
/*
* urlparse.c
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "url.h"
/*
* Return the type of a URL without modifying the string
*/
enum url_type url_type(const char *url)
{
const char *q;
q = strchr(url, ':');
if (!q)
return URL_SUFFIX;
if (q[1] == '/' && q[2] == '/')
return URL_NORMAL;
if (q[1] == ':')
return URL_OLD_TFTP;
return URL_SUFFIX;
}
/*
* Decompose a URL into its components. This is done in-place;
* this routine does not allocate any additional storage. Freeing the
* original buffer frees all storage used.
*/
void parse_url(struct url_info *ui, char *url)
{
char *p = url;
char *q, *r, *s;
int c;
memset(ui, 0, sizeof *ui);
q = strchr(p, ':');
if (q && (q[1] == '/' && q[2] == '/')) {
ui->type = URL_NORMAL;
ui->scheme = p;
*q = '\0';
p = q+3;
q = strchr(p, '/');
if (q) {
*q = '\0';
ui->path = q+1;
q = strchr(q+1, '#');
if (q)
*q = '\0';
} else {
ui->path = "";
}
r = strchr(p, '@');
if (r) {
ui->user = p;
*r = '\0';
s = strchr(p, ':');
if (s) {
*s = '\0';
ui->passwd = s+1;
}
p = r+1;
}
ui->host = p;
r = strchr(p, ':');
if (r) {
*r++ = '\0';
ui->port = 0;
while ((c = *r++)) {
c -= '0';
if (c > 9)
break;
ui->port = ui->port * 10 + c;
}
}
} else if (q && q[1] == ':') {
*q = '\0';
ui->scheme = "tftp";
ui->host = p;
ui->path = q+2;
ui->type = URL_OLD_TFTP;
} else {
ui->path = p;
ui->type = URL_SUFFIX;
}
}
/*
* Escapes unsafe characters in a URL.
* This does *not* escape things like query characters!
* Returns the number of characters in the total output.
*/
size_t url_escape_unsafe(char *output, const char *input, size_t bufsize)
{
static const char uchexchar[] = "0123456789ABCDEF";
const char *p;
unsigned char c;
char *q;
size_t n = 0;
q = output;
for (p = input; (c = *p); p++) {
if (c <= ' ' || c > '~') {
if (++n < bufsize) *q++ = '%';
if (++n < bufsize) *q++ = uchexchar[c >> 4];
if (++n < bufsize) *q++ = uchexchar[c & 15];
} else {
if (++n < bufsize) *q++ = c;
}
}
*q = '\0';
return n;
}
static int hexdigit(char c)
{
if (c >= '0' && c <= '9')
return c - '0';
c |= 0x20;
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
return -1;
}
/*
* Unescapes a buffer, optionally ending at an *unescaped* terminator
* (like ; for TFTP). The unescaping is done in-place.
*
* If a terminator is reached, return a pointer to the first character
* after the terminator.
*/
char *url_unescape(char *buffer, char terminator)
{
char *p = buffer;
char *q = buffer;
unsigned char c;
int x, y;
while ((c = *p)) {
if (c == terminator) {
*q = '\0';
return p;
}
p++;
if (c == '%') {
x = hexdigit(p[0]);
if (x >= 0) {
y = hexdigit(p[1]);
if (y >= 0) {
*q++ = (x << 4) + y;
p += 2;
continue;
}
}
}
*q++ = c;
}
*q = '\0';
return NULL;
}
#ifdef URL_TEST
int main(int argc, char *argv[])
{
int i;
struct url_info url;
for (i = 1; i < argc; i++) {
parse_url(&url, argv[i]);
printf("scheme: %s\n"
"user: %s\n"
"passwd: %s\n"
"host: %s\n"
"port: %d\n"
"path: %s\n"
"type: %d\n",
url.scheme, url.user, url.passwd, url.host, url.port,
url.path, url.type);
}
return 0;
}
#endif