/* ----------------------------------------------------------------------- *
*
* Copyright 2011 Intel Corporation; author: H. Peter Anvin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston MA 02110-1301, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#include <inttypes.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <dprintf.h>
#include "pxe.h"
enum http_readdir_state {
st_start, /* 0 Initial state */
st_open, /* 1 "<" */
st_a, /* 2 "<a" */
st_attribute, /* 3 "<a " */
st_h, /* 4 "<a h" */
st_hr, /* 5 */
st_hre, /* 6 */
st_href, /* 7 */
st_hrefeq, /* 8 */
st_hrefqu, /* 9 */
st_badtag, /* 10 */
st_badtagqu, /* 11 */
st_badattr, /* 12 */
st_badattrqu, /* 13 */
};
struct machine {
char xchar;
uint8_t st_xchar;
uint8_t st_left; /* < */
uint8_t st_right; /* > */
uint8_t st_space; /* white */
uint8_t st_other; /* anything else */
};
static const struct machine statemachine[] = {
/* xchar st_xchar st_left st_right st_space st_other */
{ 0, 0, st_open, st_start, st_start, st_start },
{ 'a', st_a, st_badtag, st_start, st_open, st_badtag },
{ 0, 0, st_open, st_open, st_attribute, st_badtag },
{ 'h', st_h, st_open, st_start, st_attribute, st_badattr },
{ 'r', st_hr, st_open, st_start, st_attribute, st_badattr },
{ 'e', st_hre, st_open, st_start, st_attribute, st_badattr },
{ 'f', st_href, st_open, st_start, st_attribute, st_badattr },
{ '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr },
{ '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq },
{ '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu },
{ '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag },
{ '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu },
{ '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr },
{ '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu },
};
struct html_entity {
uint16_t ucs;
const char entity[9];
};
static const struct html_entity entities[] = {
{ 34, "quot" },
{ 38, "amp" },
{ 60, "lt" },
{ 62, "gt" },
#ifdef HTTP_ALL_ENTITIES
{ 160, "nbsp" },
{ 161, "iexcl" },
{ 162, "cent" },
{ 163, "pound" },
{ 164, "curren" },
{ 165, "yen" },
{ 166, "brvbar" },
{ 167, "sect" },
{ 168, "uml" },
{ 169, "copy" },
{ 170, "ordf" },
{ 171, "laquo" },
{ 172, "not" },
{ 173, "shy" },
{ 174, "reg" },
{ 175, "macr" },
{ 176, "deg" },
{ 177, "plusmn" },
{ 178, "sup2" },
{ 179, "sup3" },
{ 180, "acute" },
{ 181, "micro" },
{ 182, "para" },
{ 183, "middot" },
{ 184, "cedil" },
{ 185, "sup1" },
{ 186, "ordm" },
{ 187, "raquo" },
{ 188, "frac14" },
{ 189, "frac12" },
{ 190, "frac34" },
{ 191, "iquest" },
{ 192, "Agrave" },
{ 193, "Aacute" },
{ 194, "Acirc" },
{ 195, "Atilde" },
{ 196, "Auml" },
{ 197, "Aring" },
{ 198, "AElig" },
{ 199, "Ccedil" },
{ 200, "Egrave" },
{ 201, "Eacute" },
{ 202, "Ecirc" },
{ 203, "Euml" },
{ 204, "Igrave" },
{ 205, "Iacute" },
{ 206, "Icirc" },
{ 207, "Iuml" },
{ 208, "ETH" },
{ 209, "Ntilde" },
{ 210, "Ograve" },
{ 211, "Oacute" },
{ 212, "Ocirc" },
{ 213, "Otilde" },
{ 214, "Ouml" },
{ 215, "times" },
{ 216, "Oslash" },
{ 217, "Ugrave" },
{ 218, "Uacute" },
{ 219, "Ucirc" },
{ 220, "Uuml" },
{ 221, "Yacute" },
{ 222, "THORN" },
{ 223, "szlig" },
{ 224, "agrave" },
{ 225, "aacute" },
{ 226, "acirc" },
{ 227, "atilde" },
{ 228, "auml" },
{ 229, "aring" },
{ 230, "aelig" },
{ 231, "ccedil" },
{ 232, "egrave" },
{ 233, "eacute" },
{ 234, "ecirc" },
{ 235, "euml" },
{ 236, "igrave" },
{ 237, "iacute" },
{ 238, "icirc" },
{ 239, "iuml" },
{ 240, "eth" },
{ 241, "ntilde" },
{ 242, "ograve" },
{ 243, "oacute" },
{ 244, "ocirc" },
{ 245, "otilde" },
{ 246, "ouml" },
{ 247, "divide" },
{ 248, "oslash" },
{ 249, "ugrave" },
{ 250, "uacute" },
{ 251, "ucirc" },
{ 252, "uuml" },
{ 253, "yacute" },
{ 254, "thorn" },
{ 255, "yuml" },
{ 338, "OElig" },
{ 339, "oelig" },
{ 352, "Scaron" },
{ 353, "scaron" },
{ 376, "Yuml" },
{ 402, "fnof" },
{ 710, "circ" },
{ 732, "tilde" },
{ 913, "Alpha" },
{ 914, "Beta" },
{ 915, "Gamma" },
{ 916, "Delta" },
{ 917, "Epsilon" },
{ 918, "Zeta" },
{ 919, "Eta" },
{ 920, "Theta" },
{ 921, "Iota" },
{ 922, "Kappa" },
{ 923, "Lambda" },
{ 924, "Mu" },
{ 925, "Nu" },
{ 926, "Xi" },
{ 927, "Omicron" },
{ 928, "Pi" },
{ 929, "Rho" },
{ 931, "Sigma" },
{ 932, "Tau" },
{ 933, "Upsilon" },
{ 934, "Phi" },
{ 935, "Chi" },
{ 936, "Psi" },
{ 937, "Omega" },
{ 945, "alpha" },
{ 946, "beta" },
{ 947, "gamma" },
{ 948, "delta" },
{ 949, "epsilon" },
{ 950, "zeta" },
{ 951, "eta" },
{ 952, "theta" },
{ 953, "iota" },
{ 954, "kappa" },
{ 955, "lambda" },
{ 956, "mu" },
{ 957, "nu" },
{ 958, "xi" },
{ 959, "omicron" },
{ 960, "pi" },
{ 961, "rho" },
{ 962, "sigmaf" },
{ 963, "sigma" },
{ 964, "tau" },
{ 965, "upsilon" },
{ 966, "phi" },
{ 967, "chi" },
{ 968, "psi" },
{ 969, "omega" },
{ 977, "thetasym" },
{ 978, "upsih" },
{ 982, "piv" },
{ 8194, "ensp" },
{ 8195, "emsp" },
{ 8201, "thinsp" },
{ 8204, "zwnj" },
{ 8205, "zwj" },
{ 8206, "lrm" },
{ 8207, "rlm" },
{ 8211, "ndash" },
{ 8212, "mdash" },
{ 8216, "lsquo" },
{ 8217, "rsquo" },
{ 8218, "sbquo" },
{ 8220, "ldquo" },
{ 8221, "rdquo" },
{ 8222, "bdquo" },
{ 8224, "dagger" },
{ 8225, "Dagger" },
{ 8226, "bull" },
{ 8230, "hellip" },
{ 8240, "permil" },
{ 8242, "prime" },
{ 8243, "Prime" },
{ 8249, "lsaquo" },
{ 8250, "rsaquo" },
{ 8254, "oline" },
{ 8260, "frasl" },
{ 8364, "euro" },
{ 8465, "image" },
{ 8472, "weierp" },
{ 8476, "real" },
{ 8482, "trade" },
{ 8501, "alefsym" },
{ 8592, "larr" },
{ 8593, "uarr" },
{ 8594, "rarr" },
{ 8595, "darr" },
{ 8596, "harr" },
{ 8629, "crarr" },
{ 8656, "lArr" },
{ 8657, "uArr" },
{ 8658, "rArr" },
{ 8659, "dArr" },
{ 8660, "hArr" },
{ 8704, "forall" },
{ 8706, "part" },
{ 8707, "exist" },
{ 8709, "empty" },
{ 8711, "nabla" },
{ 8712, "isin" },
{ 8713, "notin" },
{ 8715, "ni" },
{ 8719, "prod" },
{ 8721, "sum" },
{ 8722, "minus" },
{ 8727, "lowast" },
{ 8730, "radic" },
{ 8733, "prop" },
{ 8734, "infin" },
{ 8736, "ang" },
{ 8743, "and" },
{ 8744, "or" },
{ 8745, "cap" },
{ 8746, "cup" },
{ 8747, "int" },
{ 8756, "there4" },
{ 8764, "sim" },
{ 8773, "cong" },
{ 8776, "asymp" },
{ 8800, "ne" },
{ 8801, "equiv" },
{ 8804, "le" },
{ 8805, "ge" },
{ 8834, "sub" },
{ 8835, "sup" },
{ 8836, "nsub" },
{ 8838, "sube" },
{ 8839, "supe" },
{ 8853, "oplus" },
{ 8855, "otimes" },
{ 8869, "perp" },
{ 8901, "sdot" },
{ 8968, "lceil" },
{ 8969, "rceil" },
{ 8970, "lfloor" },
{ 8971, "rfloor" },
{ 9001, "lang" },
{ 9002, "rang" },
{ 9674, "loz" },
{ 9824, "spades" },
{ 9827, "clubs" },
{ 9829, "hearts" },
{ 9830, "diams" },
#endif /* HTTP_ALL_ENTITIES */
{ 0, "" }
};
struct entity_state {
char entity_buf[16];
char *ep;
};
static char *emit(char *p, int c, struct entity_state *st)
{
const struct html_entity *ent;
unsigned int ucs;
if (!st->ep) {
if (c == '&') {
/* Entity open */
st->ep = st->entity_buf;
} else {
*p++ = c;
}
} else {
if (c == ';') {
st->ep = NULL;
*p = '\0';
if (st->entity_buf[0] == '#') {
if ((st->entity_buf[1] | 0x20)== 'x') {
ucs = strtoul(st->entity_buf + 2, NULL, 16);
} else {
ucs = strtoul(st->entity_buf + 1, NULL, 10);
}
} else {
for (ent = entities; ent->ucs; ent++) {
if (!strcmp(st->entity_buf, ent->entity))
break;
}
ucs = ent->ucs;
}
if (ucs < 32 || ucs >= 0x10ffff)
return p; /* Bogus */
if (ucs >= 0x10000) {
*p++ = 0xf0 + (ucs >> 18);
*p++ = 0x80 + ((ucs >> 12) & 0x3f);
*p++ = 0x80 + ((ucs >> 6) & 0x3f);
*p++ = 0x80 + (ucs & 0x3f);
} else if (ucs >= 0x800) {
*p++ = 0xe0 + (ucs >> 12);
*p++ = 0x80 + ((ucs >> 6) & 0x3f);
*p++ = 0x80 + (ucs & 0x3f);
} else if (ucs >= 0x80) {
*p++ = 0xc0 + (ucs >> 6);
*p++ = 0x80 + (ucs & 0x3f);
} else {
*p++ = ucs;
}
} else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) {
*st->ep++ = c;
}
}
return p;
}
static const char *http_get_filename(struct inode *inode, char *buf)
{
int c, lc;
char *p;
const struct machine *sm;
struct entity_state es;
enum http_readdir_state state = st_start;
enum http_readdir_state pstate = st_start;
memset(&es, 0, sizeof es);
p = buf;
for (;;) {
c = pxe_getc(inode);
if (c == -1)
return NULL;
lc = tolower(c);
sm = &statemachine[state];
if (lc == sm->xchar)
state = sm->st_xchar;
else if (c == '<')
state = sm->st_left;
else if (c == '>')
state = sm->st_right;
else if (isspace(c))
state = sm->st_space;
else
state = sm->st_other;
if (state == st_hrefeq || state == st_hrefqu) {
if (state != pstate)
p = buf;
else if (p < buf + FILENAME_MAX)
p = emit(p, c, &es);
pstate = state;
} else {
if (pstate != st_start)
pstate = st_start;
if (p != buf && state == st_start) {
*p = '\0';
return buf;
}
}
}
}
int http_readdir(struct inode *inode, struct dirent *dirent)
{
char buf[FILENAME_MAX + 6];
const char *fn, *sp;
for (;;) {
fn = http_get_filename(inode, buf);
if (!fn)
return -1; /* End of directory */
/* Ignore entries with http special characters */
if (strchr(fn, '#'))
continue;
if (strchr(fn, '?'))
continue;
/* A slash if present has to be the last character, and not the first */
sp = strchr(fn, '/');
if (sp) {
if (sp == fn || sp[1])
continue;
} else {
sp = strchr(fn, '\0');
}
if (sp > fn + NAME_MAX)
continue;
dirent->d_ino = 0; /* Not applicable */
dirent->d_off = 0; /* Not applicable */
dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1;
dirent->d_type = *sp == '/' ? DT_DIR : DT_REG;
memcpy(dirent->d_name, fn, sp-fn);
dirent->d_name[sp-fn] = '\0';
return 0;
}
}