##
## deja-vu batman, didn't I write this before?
## This parser is designed to parse an SGML document, and the default action
## is just to pass the data through. Based on TestSGMLParser from sgmllib.py
## Hmm, actually, make it a flag whether to handle unknown elements
##
from sgmllib import SGMLParser
class PassSGMLParser(SGMLParser):
def __init__(self, fp, pass_unknown=0, verbose=0):
self.pass_unknown = pass_unknown
self.data = ""
self.fp = fp
SGMLParser.__init__(self, verbose)
def handle_data(self, data):
self.data = self.data + data
def flush(self):
data = self.data
if data:
self.data = ""
self.write(data)
def write (self, data):
return self.fp.write(data)
def write_starttag (self, tag, attrs):
self.flush()
if not attrs:
self.write ("<%s>" % tag)
else:
self.write ("<" + tag)
for name, value in attrs:
self.write (" " + name + '=' + '"' + value + '"')
self.write (">")
def write_endtag (self, tag):
self.flush()
self.write ("</%s>" % tag)
def handle_comment(self, data):
# don't pass comments
pass
def unknown_starttag(self, tag, attrs):
if self.pass_unknown:
self.write_starttag (tag, attrs)
def unknown_endtag(self, tag):
if self.pass_unknown:
self.write_endtag(tag)
def handle_entityref(self, ref):
self.flush()
self.write ("&%s;" % ref)
def handle_charref(self, ref):
self.flush()
self.write ("&#%s;" % ref)
def close(self):
SGMLParser.close(self)
self.flush()