#include "XMLHandler.h" #include <algorithm> #include <expat.h> #include <stdio.h> #include <string.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> #define NS_SEPARATOR 1 #define MORE_INDENT " " static string xml_text_escape(const string& s) { string result; const size_t N = s.length(); for (size_t i=0; i<N; i++) { char c = s[i]; switch (c) { case '<': result += "<"; break; case '>': result += ">"; break; case '&': result += "&"; break; default: result += c; break; } } return result; } static string xml_attr_escape(const string& s) { string result; const size_t N = s.length(); for (size_t i=0; i<N; i++) { char c = s[i]; switch (c) { case '\"': result += """; break; default: result += c; break; } } return result; } XMLNamespaceMap::XMLNamespaceMap() { } XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces) { while (*nspaces) { m_map[nspaces[1]] = nspaces[0]; nspaces += 2; } } string XMLNamespaceMap::Get(const string& ns) const { if (ns == "xml") { return ns; } map<string,string>::const_iterator it = m_map.find(ns); if (it == m_map.end()) { return ""; } else { return it->second; } } string XMLNamespaceMap::GetPrefix(const string& ns) const { if (ns == "") { return ""; } map<string,string>::const_iterator it = m_map.find(ns); if (it != m_map.end()) { if (it->second == "") { return ""; } else { return it->second + ":"; } } else { return ":"; // invalid } } void XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const { map<string,string>::const_iterator it; for (it=m_map.begin(); it!=m_map.end(); it++) { if (it->second == "xml") { continue; } XMLAttribute attr; if (it->second == "") { attr.name = "xmlns"; } else { attr.name = "xmlns:"; attr.name += it->second; } attr.value = it->first; attrs->push_back(attr); } } XMLAttribute::XMLAttribute() { } XMLAttribute::XMLAttribute(const XMLAttribute& that) :ns(that.ns), name(that.name), value(that.value) { } XMLAttribute::XMLAttribute(string n, string na, string v) :ns(n), name(na), value(v) { } XMLAttribute::~XMLAttribute() { } int XMLAttribute::Compare(const XMLAttribute& that) const { if (ns != that.ns) { return ns < that.ns ? -1 : 1; } if (name != that.name) { return name < that.name ? -1 : 1; } return 0; } string XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name, const string& def) { const size_t N = list.size(); for (size_t i=0; i<N; i++) { const XMLAttribute& attr = list[i]; if (attr.ns == ns && attr.name == name) { return attr.value; } } return def; } struct xml_handler_data { vector<XMLHandler*> stack; XML_Parser parser; vector<vector<XMLAttribute>*> attributes; string filename; }; XMLNode::XMLNode() { } XMLNode::~XMLNode() { // for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>); } XMLNode* XMLNode::Clone() const { switch (m_type) { case ELEMENT: { XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty); const size_t N = m_children.size(); for (size_t i=0; i<N; i++) { e->m_children.push_back(m_children[i]->Clone()); } return e; } case TEXT: { return XMLNode::NewText(m_pos, m_text, m_pretty); } default: return NULL; } } XMLNode* XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name, const vector<XMLAttribute>& attrs, int pretty) { XMLNode* node = new XMLNode(); node->m_type = ELEMENT; node->m_pretty = pretty; node->m_pos = pos; node->m_ns = ns; node->m_name = name; node->m_attrs = attrs; return node; } XMLNode* XMLNode::NewText(const SourcePos& pos, const string& text, int pretty) { XMLNode* node = new XMLNode(); node->m_type = TEXT; node->m_pretty = pretty; node->m_pos = pos; node->m_text = text; return node; } void XMLNode::SetPrettyRecursive(int value) { m_pretty = value; const size_t N = m_children.size(); for (size_t i=0; i<N; i++) { m_children[i]->SetPrettyRecursive(value); } } string XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const { return contents_to_string(nspaces, ""); } string XMLNode::ToString(const XMLNamespaceMap& nspaces) const { return to_string(nspaces, ""); } string XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const { return open_tag_to_string(nspaces, "", pretty); } string XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const { string result; const size_t N = m_children.size(); for (size_t i=0; i<N; i++) { const XMLNode* child = m_children[i]; switch (child->Type()) { case ELEMENT: if (m_pretty == PRETTY) { result += '\n'; result += indent; } case TEXT: result += child->to_string(nspaces, indent); break; } } return result; } string trim_string(const string& str) { const char* p = str.c_str(); while (*p && isspace(*p)) { p++; } const char* q = str.c_str() + str.length() - 1; while (q > p && isspace(*q)) { q--; } q++; return string(p, q-p); } string XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const { if (m_type != ELEMENT) { return ""; } string result = "<"; result += nspaces.GetPrefix(m_ns); result += m_name; vector<XMLAttribute> attrs = m_attrs; sort(attrs.begin(), attrs.end()); const size_t N = attrs.size(); for (size_t i=0; i<N; i++) { const XMLAttribute& attr = attrs[i]; if (i == 0 || m_pretty == EXACT || pretty == EXACT) { result += ' '; } else { result += "\n"; result += indent; result += MORE_INDENT; result += MORE_INDENT; } result += nspaces.GetPrefix(attr.ns); result += attr.name; result += "=\""; result += xml_attr_escape(attr.value); result += '\"'; } if (m_children.size() > 0) { result += '>'; } else { result += " />"; } return result; } string XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const { switch (m_type) { case TEXT: { if (m_pretty == EXACT) { return xml_text_escape(m_text); } else { return xml_text_escape(trim_string(m_text)); } } case ELEMENT: { string result = open_tag_to_string(nspaces, indent, PRETTY); if (m_children.size() > 0) { result += contents_to_string(nspaces, indent + MORE_INDENT); if (m_pretty == PRETTY && m_children.size() > 0) { result += '\n'; result += indent; } result += "</"; result += nspaces.GetPrefix(m_ns); result += m_name; result += '>'; } return result; } default: return ""; } } string XMLNode::CollapseTextContents() const { if (m_type == TEXT) { return m_text; } else if (m_type == ELEMENT) { string result; const size_t N=m_children.size(); for (size_t i=0; i<N; i++) { result += m_children[i]->CollapseTextContents(); } return result; } else { return ""; } } vector<XMLNode*> XMLNode::GetElementsByName(const string& ns, const string& name) const { vector<XMLNode*> result; const size_t N=m_children.size(); for (size_t i=0; i<N; i++) { XMLNode* child = m_children[i]; if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { result.push_back(child); } } return result; } XMLNode* XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const { vector<XMLNode*> result; const size_t N=m_children.size(); for (size_t i=0; i<N; i++) { XMLNode* child = m_children[i]; if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { if (index == 0) { return child; } else { index--; } } } return NULL; } size_t XMLNode::CountElementsByName(const string& ns, const string& name) const { size_t result = 0; const size_t N=m_children.size(); for (size_t i=0; i<N; i++) { XMLNode* child = m_children[i]; if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { result++; } } return result; } string XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const { return XMLAttribute::Find(m_attrs, ns, name, def); } static void parse_namespace(const char* data, string* ns, string* name) { const char* p = strchr(data, NS_SEPARATOR); if (p != NULL) { ns->assign(data, p-data); name->assign(p+1); } else { ns->assign(""); name->assign(data); } } static void convert_attrs(const char** in, vector<XMLAttribute>* out) { while (*in) { XMLAttribute attr; parse_namespace(in[0], &attr.ns, &attr.name); attr.value = in[1]; out->push_back(attr); in += 2; } } static bool list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler) { const size_t N = stack.size(); for (size_t i=0; i<N; i++) { if (stack[i] == handler) { return true; } } return false; } static void XMLCALL start_element_handler(void *userData, const char *name, const char **attrs) { xml_handler_data* data = (xml_handler_data*)userData; XMLHandler* handler = data->stack[data->stack.size()-1]; SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); string nsString; string nameString; XMLHandler* next = handler; vector<XMLAttribute> attributes; parse_namespace(name, &nsString, &nameString); convert_attrs(attrs, &attributes); handler->OnStartElement(pos, nsString, nameString, attributes, &next); if (next == NULL) { next = handler; } if (next != handler) { next->elementPos = pos; next->elementNamespace = nsString; next->elementName = nameString; next->elementAttributes = attributes; } data->stack.push_back(next); } static void XMLCALL end_element_handler(void *userData, const char *name) { xml_handler_data* data = (xml_handler_data*)userData; XMLHandler* handler = data->stack[data->stack.size()-1]; data->stack.pop_back(); SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); if (!list_contains(data->stack, handler)) { handler->OnDone(pos); if (data->stack.size() > 1) { // not top one delete handler; } } handler = data->stack[data->stack.size()-1]; string nsString; string nameString; parse_namespace(name, &nsString, &nameString); handler->OnEndElement(pos, nsString, nameString); } static void XMLCALL text_handler(void *userData, const XML_Char *s, int len) { xml_handler_data* data = (xml_handler_data*)userData; XMLHandler* handler = data->stack[data->stack.size()-1]; SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); handler->OnText(pos, string(s, len)); } static void XMLCALL comment_handler(void *userData, const char *comment) { xml_handler_data* data = (xml_handler_data*)userData; XMLHandler* handler = data->stack[data->stack.size()-1]; SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); handler->OnComment(pos, string(comment)); } bool XMLHandler::ParseFile(const string& filename, XMLHandler* handler) { char buf[16384]; int fd = open(filename.c_str(), O_RDONLY); if (fd < 0) { SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno)); return false; } XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR); xml_handler_data state; state.stack.push_back(handler); state.parser = parser; state.filename = filename; XML_SetUserData(parser, &state); XML_SetElementHandler(parser, start_element_handler, end_element_handler); XML_SetCharacterDataHandler(parser, text_handler); XML_SetCommentHandler(parser, comment_handler); ssize_t len; bool done; do { len = read(fd, buf, sizeof(buf)); done = len < (ssize_t)sizeof(buf); if (len < 0) { SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno)); close(fd); return false; } if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) { SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error( "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); close(fd); return false; } } while (!done); XML_ParserFree(parser); close(fd); return true; } bool XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler) { XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR); xml_handler_data state; state.stack.push_back(handler); state.parser = parser; state.filename = filename; XML_SetUserData(parser, &state); XML_SetElementHandler(parser, start_element_handler, end_element_handler); XML_SetCharacterDataHandler(parser, text_handler); XML_SetCommentHandler(parser, comment_handler); if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) { SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error( "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); return false; } XML_ParserFree(parser); return true; } XMLHandler::XMLHandler() { } XMLHandler::~XMLHandler() { } int XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, const vector<XMLAttribute>& attrs, XMLHandler** next) { return 0; } int XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) { return 0; } int XMLHandler::OnText(const SourcePos& pos, const string& text) { return 0; } int XMLHandler::OnComment(const SourcePos& pos, const string& text) { return 0; } int XMLHandler::OnDone(const SourcePos& pos) { return 0; } TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next) :m_ns(ns), m_name(name), m_next(next) { } int TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, const vector<XMLAttribute>& attrs, XMLHandler** next) { *next = m_next; return 0; } int TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) { return 0; } int TopElementHandler::OnText(const SourcePos& pos, const string& text) { return 0; } int TopElementHandler::OnDone(const SourcePos& pos) { return 0; } NodeHandler::NodeHandler(XMLNode* root, int pretty) :m_root(root), m_pretty(pretty) { if (root != NULL) { m_nodes.push_back(root); } } NodeHandler::~NodeHandler() { } int NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, const vector<XMLAttribute>& attrs, XMLHandler** next) { int pretty; if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") { pretty = XMLNode::EXACT; } else { if (m_root == NULL) { pretty = m_pretty; } else { pretty = m_nodes[m_nodes.size()-1]->Pretty(); } } XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty); if (m_root == NULL) { m_root = n; } else { m_nodes[m_nodes.size()-1]->EditChildren().push_back(n); } m_nodes.push_back(n); return 0; } int NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) { m_nodes.pop_back(); return 0; } int NodeHandler::OnText(const SourcePos& pos, const string& text) { if (m_root == NULL) { return 1; } XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty()); m_nodes[m_nodes.size()-1]->EditChildren().push_back(n); return 0; } int NodeHandler::OnComment(const SourcePos& pos, const string& text) { return 0; } int NodeHandler::OnDone(const SourcePos& pos) { return 0; } XMLNode* NodeHandler::ParseFile(const string& filename, int pretty) { NodeHandler handler(NULL, pretty); if (!XMLHandler::ParseFile(filename, &handler)) { fprintf(stderr, "error parsing file: %s\n", filename.c_str()); return NULL; } return handler.Root(); } XMLNode* NodeHandler::ParseString(const string& filename, const string& text, int pretty) { NodeHandler handler(NULL, pretty); if (!XMLHandler::ParseString(filename, text, &handler)) { fprintf(stderr, "error parsing file: %s\n", filename.c_str()); return NULL; } return handler.Root(); }