#include "XMLHandler.h"
#include <algorithm>
#include <expat.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#define NS_SEPARATOR 1
#define MORE_INDENT " "
static string
xml_text_escape(const string& s)
{
string result;
const size_t N = s.length();
for (size_t i=0; i<N; i++) {
char c = s[i];
switch (c) {
case '<':
result += "<";
break;
case '>':
result += ">";
break;
case '&':
result += "&";
break;
default:
result += c;
break;
}
}
return result;
}
static string
xml_attr_escape(const string& s)
{
string result;
const size_t N = s.length();
for (size_t i=0; i<N; i++) {
char c = s[i];
switch (c) {
case '\"':
result += """;
break;
default:
result += c;
break;
}
}
return result;
}
XMLNamespaceMap::XMLNamespaceMap()
{
}
XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces)
{
while (*nspaces) {
m_map[nspaces[1]] = nspaces[0];
nspaces += 2;
}
}
string
XMLNamespaceMap::Get(const string& ns) const
{
if (ns == "xml") {
return ns;
}
map<string,string>::const_iterator it = m_map.find(ns);
if (it == m_map.end()) {
return "";
} else {
return it->second;
}
}
string
XMLNamespaceMap::GetPrefix(const string& ns) const
{
if (ns == "") {
return "";
}
map<string,string>::const_iterator it = m_map.find(ns);
if (it != m_map.end()) {
if (it->second == "") {
return "";
} else {
return it->second + ":";
}
} else {
return ":"; // invalid
}
}
void
XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const
{
map<string,string>::const_iterator it;
for (it=m_map.begin(); it!=m_map.end(); it++) {
if (it->second == "xml") {
continue;
}
XMLAttribute attr;
if (it->second == "") {
attr.name = "xmlns";
} else {
attr.name = "xmlns:";
attr.name += it->second;
}
attr.value = it->first;
attrs->push_back(attr);
}
}
XMLAttribute::XMLAttribute()
{
}
XMLAttribute::XMLAttribute(const XMLAttribute& that)
:ns(that.ns),
name(that.name),
value(that.value)
{
}
XMLAttribute::XMLAttribute(string n, string na, string v)
:ns(n),
name(na),
value(v)
{
}
XMLAttribute::~XMLAttribute()
{
}
int
XMLAttribute::Compare(const XMLAttribute& that) const
{
if (ns != that.ns) {
return ns < that.ns ? -1 : 1;
}
if (name != that.name) {
return name < that.name ? -1 : 1;
}
return 0;
}
string
XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name,
const string& def)
{
const size_t N = list.size();
for (size_t i=0; i<N; i++) {
const XMLAttribute& attr = list[i];
if (attr.ns == ns && attr.name == name) {
return attr.value;
}
}
return def;
}
struct xml_handler_data {
vector<XMLHandler*> stack;
XML_Parser parser;
vector<vector<XMLAttribute>*> attributes;
string filename;
};
XMLNode::XMLNode()
{
}
XMLNode::~XMLNode()
{
// for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>);
}
XMLNode*
XMLNode::Clone() const
{
switch (m_type) {
case ELEMENT: {
XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty);
const size_t N = m_children.size();
for (size_t i=0; i<N; i++) {
e->m_children.push_back(m_children[i]->Clone());
}
return e;
}
case TEXT: {
return XMLNode::NewText(m_pos, m_text, m_pretty);
}
default:
return NULL;
}
}
XMLNode*
XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name,
const vector<XMLAttribute>& attrs, int pretty)
{
XMLNode* node = new XMLNode();
node->m_type = ELEMENT;
node->m_pretty = pretty;
node->m_pos = pos;
node->m_ns = ns;
node->m_name = name;
node->m_attrs = attrs;
return node;
}
XMLNode*
XMLNode::NewText(const SourcePos& pos, const string& text, int pretty)
{
XMLNode* node = new XMLNode();
node->m_type = TEXT;
node->m_pretty = pretty;
node->m_pos = pos;
node->m_text = text;
return node;
}
void
XMLNode::SetPrettyRecursive(int value)
{
m_pretty = value;
const size_t N = m_children.size();
for (size_t i=0; i<N; i++) {
m_children[i]->SetPrettyRecursive(value);
}
}
string
XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const
{
return contents_to_string(nspaces, "");
}
string
XMLNode::ToString(const XMLNamespaceMap& nspaces) const
{
return to_string(nspaces, "");
}
string
XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const
{
return open_tag_to_string(nspaces, "", pretty);
}
string
XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const
{
string result;
const size_t N = m_children.size();
for (size_t i=0; i<N; i++) {
const XMLNode* child = m_children[i];
switch (child->Type()) {
case ELEMENT:
if (m_pretty == PRETTY) {
result += '\n';
result += indent;
}
case TEXT:
result += child->to_string(nspaces, indent);
break;
}
}
return result;
}
string
trim_string(const string& str)
{
const char* p = str.c_str();
while (*p && isspace(*p)) {
p++;
}
const char* q = str.c_str() + str.length() - 1;
while (q > p && isspace(*q)) {
q--;
}
q++;
return string(p, q-p);
}
string
XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const
{
if (m_type != ELEMENT) {
return "";
}
string result = "<";
result += nspaces.GetPrefix(m_ns);
result += m_name;
vector<XMLAttribute> attrs = m_attrs;
sort(attrs.begin(), attrs.end());
const size_t N = attrs.size();
for (size_t i=0; i<N; i++) {
const XMLAttribute& attr = attrs[i];
if (i == 0 || m_pretty == EXACT || pretty == EXACT) {
result += ' ';
}
else {
result += "\n";
result += indent;
result += MORE_INDENT;
result += MORE_INDENT;
}
result += nspaces.GetPrefix(attr.ns);
result += attr.name;
result += "=\"";
result += xml_attr_escape(attr.value);
result += '\"';
}
if (m_children.size() > 0) {
result += '>';
} else {
result += " />";
}
return result;
}
string
XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const
{
switch (m_type)
{
case TEXT: {
if (m_pretty == EXACT) {
return xml_text_escape(m_text);
} else {
return xml_text_escape(trim_string(m_text));
}
}
case ELEMENT: {
string result = open_tag_to_string(nspaces, indent, PRETTY);
if (m_children.size() > 0) {
result += contents_to_string(nspaces, indent + MORE_INDENT);
if (m_pretty == PRETTY && m_children.size() > 0) {
result += '\n';
result += indent;
}
result += "</";
result += nspaces.GetPrefix(m_ns);
result += m_name;
result += '>';
}
return result;
}
default:
return "";
}
}
string
XMLNode::CollapseTextContents() const
{
if (m_type == TEXT) {
return m_text;
}
else if (m_type == ELEMENT) {
string result;
const size_t N=m_children.size();
for (size_t i=0; i<N; i++) {
result += m_children[i]->CollapseTextContents();
}
return result;
}
else {
return "";
}
}
vector<XMLNode*>
XMLNode::GetElementsByName(const string& ns, const string& name) const
{
vector<XMLNode*> result;
const size_t N=m_children.size();
for (size_t i=0; i<N; i++) {
XMLNode* child = m_children[i];
if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
result.push_back(child);
}
}
return result;
}
XMLNode*
XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const
{
vector<XMLNode*> result;
const size_t N=m_children.size();
for (size_t i=0; i<N; i++) {
XMLNode* child = m_children[i];
if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
if (index == 0) {
return child;
} else {
index--;
}
}
}
return NULL;
}
size_t
XMLNode::CountElementsByName(const string& ns, const string& name) const
{
size_t result = 0;
const size_t N=m_children.size();
for (size_t i=0; i<N; i++) {
XMLNode* child = m_children[i];
if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
result++;
}
}
return result;
}
string
XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const
{
return XMLAttribute::Find(m_attrs, ns, name, def);
}
static void
parse_namespace(const char* data, string* ns, string* name)
{
const char* p = strchr(data, NS_SEPARATOR);
if (p != NULL) {
ns->assign(data, p-data);
name->assign(p+1);
} else {
ns->assign("");
name->assign(data);
}
}
static void
convert_attrs(const char** in, vector<XMLAttribute>* out)
{
while (*in) {
XMLAttribute attr;
parse_namespace(in[0], &attr.ns, &attr.name);
attr.value = in[1];
out->push_back(attr);
in += 2;
}
}
static bool
list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler)
{
const size_t N = stack.size();
for (size_t i=0; i<N; i++) {
if (stack[i] == handler) {
return true;
}
}
return false;
}
static void XMLCALL
start_element_handler(void *userData, const char *name, const char **attrs)
{
xml_handler_data* data = (xml_handler_data*)userData;
XMLHandler* handler = data->stack[data->stack.size()-1];
SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
string nsString;
string nameString;
XMLHandler* next = handler;
vector<XMLAttribute> attributes;
parse_namespace(name, &nsString, &nameString);
convert_attrs(attrs, &attributes);
handler->OnStartElement(pos, nsString, nameString, attributes, &next);
if (next == NULL) {
next = handler;
}
if (next != handler) {
next->elementPos = pos;
next->elementNamespace = nsString;
next->elementName = nameString;
next->elementAttributes = attributes;
}
data->stack.push_back(next);
}
static void XMLCALL
end_element_handler(void *userData, const char *name)
{
xml_handler_data* data = (xml_handler_data*)userData;
XMLHandler* handler = data->stack[data->stack.size()-1];
data->stack.pop_back();
SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
if (!list_contains(data->stack, handler)) {
handler->OnDone(pos);
if (data->stack.size() > 1) {
// not top one
delete handler;
}
}
handler = data->stack[data->stack.size()-1];
string nsString;
string nameString;
parse_namespace(name, &nsString, &nameString);
handler->OnEndElement(pos, nsString, nameString);
}
static void XMLCALL
text_handler(void *userData, const XML_Char *s, int len)
{
xml_handler_data* data = (xml_handler_data*)userData;
XMLHandler* handler = data->stack[data->stack.size()-1];
SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
handler->OnText(pos, string(s, len));
}
static void XMLCALL
comment_handler(void *userData, const char *comment)
{
xml_handler_data* data = (xml_handler_data*)userData;
XMLHandler* handler = data->stack[data->stack.size()-1];
SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
handler->OnComment(pos, string(comment));
}
bool
XMLHandler::ParseFile(const string& filename, XMLHandler* handler)
{
char buf[16384];
int fd = open(filename.c_str(), O_RDONLY);
if (fd < 0) {
SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno));
return false;
}
XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
xml_handler_data state;
state.stack.push_back(handler);
state.parser = parser;
state.filename = filename;
XML_SetUserData(parser, &state);
XML_SetElementHandler(parser, start_element_handler, end_element_handler);
XML_SetCharacterDataHandler(parser, text_handler);
XML_SetCommentHandler(parser, comment_handler);
ssize_t len;
bool done;
do {
len = read(fd, buf, sizeof(buf));
done = len < (ssize_t)sizeof(buf);
if (len < 0) {
SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno));
close(fd);
return false;
}
if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
"Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
close(fd);
return false;
}
} while (!done);
XML_ParserFree(parser);
close(fd);
return true;
}
bool
XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler)
{
XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
xml_handler_data state;
state.stack.push_back(handler);
state.parser = parser;
state.filename = filename;
XML_SetUserData(parser, &state);
XML_SetElementHandler(parser, start_element_handler, end_element_handler);
XML_SetCharacterDataHandler(parser, text_handler);
XML_SetCommentHandler(parser, comment_handler);
if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) {
SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
"Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
return false;
}
XML_ParserFree(parser);
return true;
}
XMLHandler::XMLHandler()
{
}
XMLHandler::~XMLHandler()
{
}
int
XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
const vector<XMLAttribute>& attrs, XMLHandler** next)
{
return 0;
}
int
XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
{
return 0;
}
int
XMLHandler::OnText(const SourcePos& pos, const string& text)
{
return 0;
}
int
XMLHandler::OnComment(const SourcePos& pos, const string& text)
{
return 0;
}
int
XMLHandler::OnDone(const SourcePos& pos)
{
return 0;
}
TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next)
:m_ns(ns),
m_name(name),
m_next(next)
{
}
int
TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
const vector<XMLAttribute>& attrs, XMLHandler** next)
{
*next = m_next;
return 0;
}
int
TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
{
return 0;
}
int
TopElementHandler::OnText(const SourcePos& pos, const string& text)
{
return 0;
}
int
TopElementHandler::OnDone(const SourcePos& pos)
{
return 0;
}
NodeHandler::NodeHandler(XMLNode* root, int pretty)
:m_root(root),
m_pretty(pretty)
{
if (root != NULL) {
m_nodes.push_back(root);
}
}
NodeHandler::~NodeHandler()
{
}
int
NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
const vector<XMLAttribute>& attrs, XMLHandler** next)
{
int pretty;
if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") {
pretty = XMLNode::EXACT;
} else {
if (m_root == NULL) {
pretty = m_pretty;
} else {
pretty = m_nodes[m_nodes.size()-1]->Pretty();
}
}
XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty);
if (m_root == NULL) {
m_root = n;
} else {
m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
}
m_nodes.push_back(n);
return 0;
}
int
NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
{
m_nodes.pop_back();
return 0;
}
int
NodeHandler::OnText(const SourcePos& pos, const string& text)
{
if (m_root == NULL) {
return 1;
}
XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty());
m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
return 0;
}
int
NodeHandler::OnComment(const SourcePos& pos, const string& text)
{
return 0;
}
int
NodeHandler::OnDone(const SourcePos& pos)
{
return 0;
}
XMLNode*
NodeHandler::ParseFile(const string& filename, int pretty)
{
NodeHandler handler(NULL, pretty);
if (!XMLHandler::ParseFile(filename, &handler)) {
fprintf(stderr, "error parsing file: %s\n", filename.c_str());
return NULL;
}
return handler.Root();
}
XMLNode*
NodeHandler::ParseString(const string& filename, const string& text, int pretty)
{
NodeHandler handler(NULL, pretty);
if (!XMLHandler::ParseString(filename, text, &handler)) {
fprintf(stderr, "error parsing file: %s\n", filename.c_str());
return NULL;
}
return handler.Root();
}