/* ----------------------------------------------------------------------- * * * Copyright 2011 Intel Corporation; author: H. Peter Anvin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston MA 02110-1301, USA; either version 2 of the License, or * (at your option) any later version; incorporated herein by reference. * * ----------------------------------------------------------------------- */ #include <inttypes.h> #include <string.h> #include <stdlib.h> #include <ctype.h> #include <dprintf.h> #include "pxe.h" enum http_readdir_state { st_start, /* 0 Initial state */ st_open, /* 1 "<" */ st_a, /* 2 "<a" */ st_attribute, /* 3 "<a " */ st_h, /* 4 "<a h" */ st_hr, /* 5 */ st_hre, /* 6 */ st_href, /* 7 */ st_hrefeq, /* 8 */ st_hrefqu, /* 9 */ st_badtag, /* 10 */ st_badtagqu, /* 11 */ st_badattr, /* 12 */ st_badattrqu, /* 13 */ }; struct machine { char xchar; uint8_t st_xchar; uint8_t st_left; /* < */ uint8_t st_right; /* > */ uint8_t st_space; /* white */ uint8_t st_other; /* anything else */ }; static const struct machine statemachine[] = { /* xchar st_xchar st_left st_right st_space st_other */ { 0, 0, st_open, st_start, st_start, st_start }, { 'a', st_a, st_badtag, st_start, st_open, st_badtag }, { 0, 0, st_open, st_open, st_attribute, st_badtag }, { 'h', st_h, st_open, st_start, st_attribute, st_badattr }, { 'r', st_hr, st_open, st_start, st_attribute, st_badattr }, { 'e', st_hre, st_open, st_start, st_attribute, st_badattr }, { 'f', st_href, st_open, st_start, st_attribute, st_badattr }, { '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr }, { '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq }, { '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu }, { '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag }, { '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu }, { '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr }, { '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu }, }; struct html_entity { uint16_t ucs; const char entity[9]; }; static const struct html_entity entities[] = { { 34, "quot" }, { 38, "amp" }, { 60, "lt" }, { 62, "gt" }, #ifdef HTTP_ALL_ENTITIES { 160, "nbsp" }, { 161, "iexcl" }, { 162, "cent" }, { 163, "pound" }, { 164, "curren" }, { 165, "yen" }, { 166, "brvbar" }, { 167, "sect" }, { 168, "uml" }, { 169, "copy" }, { 170, "ordf" }, { 171, "laquo" }, { 172, "not" }, { 173, "shy" }, { 174, "reg" }, { 175, "macr" }, { 176, "deg" }, { 177, "plusmn" }, { 178, "sup2" }, { 179, "sup3" }, { 180, "acute" }, { 181, "micro" }, { 182, "para" }, { 183, "middot" }, { 184, "cedil" }, { 185, "sup1" }, { 186, "ordm" }, { 187, "raquo" }, { 188, "frac14" }, { 189, "frac12" }, { 190, "frac34" }, { 191, "iquest" }, { 192, "Agrave" }, { 193, "Aacute" }, { 194, "Acirc" }, { 195, "Atilde" }, { 196, "Auml" }, { 197, "Aring" }, { 198, "AElig" }, { 199, "Ccedil" }, { 200, "Egrave" }, { 201, "Eacute" }, { 202, "Ecirc" }, { 203, "Euml" }, { 204, "Igrave" }, { 205, "Iacute" }, { 206, "Icirc" }, { 207, "Iuml" }, { 208, "ETH" }, { 209, "Ntilde" }, { 210, "Ograve" }, { 211, "Oacute" }, { 212, "Ocirc" }, { 213, "Otilde" }, { 214, "Ouml" }, { 215, "times" }, { 216, "Oslash" }, { 217, "Ugrave" }, { 218, "Uacute" }, { 219, "Ucirc" }, { 220, "Uuml" }, { 221, "Yacute" }, { 222, "THORN" }, { 223, "szlig" }, { 224, "agrave" }, { 225, "aacute" }, { 226, "acirc" }, { 227, "atilde" }, { 228, "auml" }, { 229, "aring" }, { 230, "aelig" }, { 231, "ccedil" }, { 232, "egrave" }, { 233, "eacute" }, { 234, "ecirc" }, { 235, "euml" }, { 236, "igrave" }, { 237, "iacute" }, { 238, "icirc" }, { 239, "iuml" }, { 240, "eth" }, { 241, "ntilde" }, { 242, "ograve" }, { 243, "oacute" }, { 244, "ocirc" }, { 245, "otilde" }, { 246, "ouml" }, { 247, "divide" }, { 248, "oslash" }, { 249, "ugrave" }, { 250, "uacute" }, { 251, "ucirc" }, { 252, "uuml" }, { 253, "yacute" }, { 254, "thorn" }, { 255, "yuml" }, { 338, "OElig" }, { 339, "oelig" }, { 352, "Scaron" }, { 353, "scaron" }, { 376, "Yuml" }, { 402, "fnof" }, { 710, "circ" }, { 732, "tilde" }, { 913, "Alpha" }, { 914, "Beta" }, { 915, "Gamma" }, { 916, "Delta" }, { 917, "Epsilon" }, { 918, "Zeta" }, { 919, "Eta" }, { 920, "Theta" }, { 921, "Iota" }, { 922, "Kappa" }, { 923, "Lambda" }, { 924, "Mu" }, { 925, "Nu" }, { 926, "Xi" }, { 927, "Omicron" }, { 928, "Pi" }, { 929, "Rho" }, { 931, "Sigma" }, { 932, "Tau" }, { 933, "Upsilon" }, { 934, "Phi" }, { 935, "Chi" }, { 936, "Psi" }, { 937, "Omega" }, { 945, "alpha" }, { 946, "beta" }, { 947, "gamma" }, { 948, "delta" }, { 949, "epsilon" }, { 950, "zeta" }, { 951, "eta" }, { 952, "theta" }, { 953, "iota" }, { 954, "kappa" }, { 955, "lambda" }, { 956, "mu" }, { 957, "nu" }, { 958, "xi" }, { 959, "omicron" }, { 960, "pi" }, { 961, "rho" }, { 962, "sigmaf" }, { 963, "sigma" }, { 964, "tau" }, { 965, "upsilon" }, { 966, "phi" }, { 967, "chi" }, { 968, "psi" }, { 969, "omega" }, { 977, "thetasym" }, { 978, "upsih" }, { 982, "piv" }, { 8194, "ensp" }, { 8195, "emsp" }, { 8201, "thinsp" }, { 8204, "zwnj" }, { 8205, "zwj" }, { 8206, "lrm" }, { 8207, "rlm" }, { 8211, "ndash" }, { 8212, "mdash" }, { 8216, "lsquo" }, { 8217, "rsquo" }, { 8218, "sbquo" }, { 8220, "ldquo" }, { 8221, "rdquo" }, { 8222, "bdquo" }, { 8224, "dagger" }, { 8225, "Dagger" }, { 8226, "bull" }, { 8230, "hellip" }, { 8240, "permil" }, { 8242, "prime" }, { 8243, "Prime" }, { 8249, "lsaquo" }, { 8250, "rsaquo" }, { 8254, "oline" }, { 8260, "frasl" }, { 8364, "euro" }, { 8465, "image" }, { 8472, "weierp" }, { 8476, "real" }, { 8482, "trade" }, { 8501, "alefsym" }, { 8592, "larr" }, { 8593, "uarr" }, { 8594, "rarr" }, { 8595, "darr" }, { 8596, "harr" }, { 8629, "crarr" }, { 8656, "lArr" }, { 8657, "uArr" }, { 8658, "rArr" }, { 8659, "dArr" }, { 8660, "hArr" }, { 8704, "forall" }, { 8706, "part" }, { 8707, "exist" }, { 8709, "empty" }, { 8711, "nabla" }, { 8712, "isin" }, { 8713, "notin" }, { 8715, "ni" }, { 8719, "prod" }, { 8721, "sum" }, { 8722, "minus" }, { 8727, "lowast" }, { 8730, "radic" }, { 8733, "prop" }, { 8734, "infin" }, { 8736, "ang" }, { 8743, "and" }, { 8744, "or" }, { 8745, "cap" }, { 8746, "cup" }, { 8747, "int" }, { 8756, "there4" }, { 8764, "sim" }, { 8773, "cong" }, { 8776, "asymp" }, { 8800, "ne" }, { 8801, "equiv" }, { 8804, "le" }, { 8805, "ge" }, { 8834, "sub" }, { 8835, "sup" }, { 8836, "nsub" }, { 8838, "sube" }, { 8839, "supe" }, { 8853, "oplus" }, { 8855, "otimes" }, { 8869, "perp" }, { 8901, "sdot" }, { 8968, "lceil" }, { 8969, "rceil" }, { 8970, "lfloor" }, { 8971, "rfloor" }, { 9001, "lang" }, { 9002, "rang" }, { 9674, "loz" }, { 9824, "spades" }, { 9827, "clubs" }, { 9829, "hearts" }, { 9830, "diams" }, #endif /* HTTP_ALL_ENTITIES */ { 0, "" } }; struct entity_state { char entity_buf[16]; char *ep; }; static char *emit(char *p, int c, struct entity_state *st) { const struct html_entity *ent; unsigned int ucs; if (!st->ep) { if (c == '&') { /* Entity open */ st->ep = st->entity_buf; } else { *p++ = c; } } else { if (c == ';') { st->ep = NULL; *p = '\0'; if (st->entity_buf[0] == '#') { if ((st->entity_buf[1] | 0x20)== 'x') { ucs = strtoul(st->entity_buf + 2, NULL, 16); } else { ucs = strtoul(st->entity_buf + 1, NULL, 10); } } else { for (ent = entities; ent->ucs; ent++) { if (!strcmp(st->entity_buf, ent->entity)) break; } ucs = ent->ucs; } if (ucs < 32 || ucs >= 0x10ffff) return p; /* Bogus */ if (ucs >= 0x10000) { *p++ = 0xf0 + (ucs >> 18); *p++ = 0x80 + ((ucs >> 12) & 0x3f); *p++ = 0x80 + ((ucs >> 6) & 0x3f); *p++ = 0x80 + (ucs & 0x3f); } else if (ucs >= 0x800) { *p++ = 0xe0 + (ucs >> 12); *p++ = 0x80 + ((ucs >> 6) & 0x3f); *p++ = 0x80 + (ucs & 0x3f); } else if (ucs >= 0x80) { *p++ = 0xc0 + (ucs >> 6); *p++ = 0x80 + (ucs & 0x3f); } else { *p++ = ucs; } } else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) { *st->ep++ = c; } } return p; } static const char *http_get_filename(struct inode *inode, char *buf) { int c, lc; char *p; const struct machine *sm; struct entity_state es; enum http_readdir_state state = st_start; enum http_readdir_state pstate = st_start; memset(&es, 0, sizeof es); p = buf; for (;;) { c = pxe_getc(inode); if (c == -1) return NULL; lc = tolower(c); sm = &statemachine[state]; if (lc == sm->xchar) state = sm->st_xchar; else if (c == '<') state = sm->st_left; else if (c == '>') state = sm->st_right; else if (isspace(c)) state = sm->st_space; else state = sm->st_other; if (state == st_hrefeq || state == st_hrefqu) { if (state != pstate) p = buf; else if (p < buf + FILENAME_MAX) p = emit(p, c, &es); pstate = state; } else { if (pstate != st_start) pstate = st_start; if (p != buf && state == st_start) { *p = '\0'; return buf; } } } } int http_readdir(struct inode *inode, struct dirent *dirent) { char buf[FILENAME_MAX + 6]; const char *fn, *sp; for (;;) { fn = http_get_filename(inode, buf); if (!fn) return -1; /* End of directory */ /* Ignore entries with http special characters */ if (strchr(fn, '#')) continue; if (strchr(fn, '?')) continue; /* A slash if present has to be the last character, and not the first */ sp = strchr(fn, '/'); if (sp) { if (sp == fn || sp[1]) continue; } else { sp = strchr(fn, '\0'); } if (sp > fn + NAME_MAX) continue; dirent->d_ino = 0; /* Not applicable */ dirent->d_off = 0; /* Not applicable */ dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1; dirent->d_type = *sp == '/' ? DT_DIR : DT_REG; memcpy(dirent->d_name, fn, sp-fn); dirent->d_name[sp-fn] = '\0'; return 0; } }