普通文本  |  66行  |  1.53 KB

##
## deja-vu batman, didn't I write this before?
## This parser is designed to parse an SGML document, and the default action 
## is just to pass the data through.  Based on TestSGMLParser from sgmllib.py
## Hmm, actually, make it a flag whether to handle unknown elements
##

from sgmllib import SGMLParser

class PassSGMLParser(SGMLParser):
  def __init__(self, fp, pass_unknown=0, verbose=0):
    self.pass_unknown = pass_unknown
    self.data = ""
    self.fp = fp
    SGMLParser.__init__(self, verbose)

  def handle_data(self, data):
    self.data = self.data + data

  def flush(self):
    data = self.data
    if data:
      self.data = ""
      self.write(data)

  def write (self, data):
    return self.fp.write(data)

  def write_starttag (self, tag, attrs):
    self.flush()
    if not attrs:
      self.write ("<%s>" % tag)
    else:
      self.write ("<" + tag)
      for name, value in attrs:
        self.write (" " + name + '=' + '"' + value + '"')
      self.write (">")

  def write_endtag (self, tag):
    self.flush()
    self.write ("</%s>" % tag)

  def handle_comment(self, data):
    # don't pass comments
    pass

  def unknown_starttag(self, tag, attrs):
    if self.pass_unknown:
      self.write_starttag (tag, attrs)

  def unknown_endtag(self, tag):
    if self.pass_unknown:
      self.write_endtag(tag)

  def handle_entityref(self, ref):
    self.flush()
    self.write ("&%s;" % ref)

  def handle_charref(self, ref):
    self.flush()
    self.write ("&#%s;" % ref)

  def close(self):
    SGMLParser.close(self)
    self.flush()