普通文本  |  308行  |  8.79 KB

#!/usr/bin/python

#
# Copyright (C) 2012 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
Usage:
  metadata_validate.py <filename.xml>
  - validates that the metadata properties defined in filename.xml are
    semantically correct.
  - does not do any XSD validation, use xmllint for that (in metadata-validate)

Module:
  A set of helpful functions for dealing with BeautifulSoup element trees.
  Especially the find_* and fully_qualified_name functions.

Dependencies:
  BeautifulSoup - an HTML/XML parser available to download from
                  http://www.crummy.com/software/BeautifulSoup/
"""

from bs4 import BeautifulSoup
from bs4 import Tag
import sys


#####################
#####################

def fully_qualified_name(entry):
  """
  Calculates the fully qualified name for an entry by walking the path
  to the root node.

  Args:
    entry: a BeautifulSoup Tag corresponding to an <entry ...> XML node

  Returns:
    A string with the full name, e.g. "android.lens.info.availableApertureSizes"
  """
  filter_tags = ['namespace', 'section']
  parents = [i['name'] for i in entry.parents if i.name in filter_tags]

  name = entry['name']

  parents.reverse()
  parents.append(name)

  fqn = ".".join(parents)

  return fqn

def find_parent_by_name(element, names):
  """
  Find the ancestor for an element whose name matches one of those
  in names.

  Args:
    element: A BeautifulSoup Tag corresponding to an XML node

  Returns:
    A BeautifulSoup element corresponding to the matched parent, or None.

    For example, assuming the following XML structure:
      <static>
        <anything>
          <entry name="Hello" />   # this is in variable 'Hello'
        </anything>
      </static>

      el = find_parent_by_name(Hello, ['static'])
      # el is now a value pointing to the '<static>' element
  """
  matching_parents = [i.name for i in element.parents if i.name in names]

  if matching_parents:
    return matching_parents[0]
  else:
    return None

def find_all_child_tags(element, tag):
    """
    Finds all the children that are a Tag (as opposed to a NavigableString),
    with a name of tag. This is useful to filter out the NavigableString out
    of the children.

    Args:
      element: A BeautifulSoup Tag corresponding to an XML node
      tag: A string representing the name of the tag

    Returns:
      A list of Tag instances

      For example, given the following XML structure:
        <enum>                    # This is the variable el
          Hello world             # NavigableString
          <value>Apple</value>    # this is the variale apple (Tag)
          <value>Orange</value>   # this is the variable orange (Tag)
          Hello world again       # NavigableString
        </enum>

        lst = find_all_child_tags(el, 'value')
        # lst is [apple, orange]

    """
    matching_tags = [i for i in element.children if isinstance(i, Tag) and i.name == tag]
    return matching_tags

def find_child_tag(element, tag):
    """
    Finds the first child that is a Tag with the matching name.

    Args:
      element: a BeautifulSoup Tag
      tag: A String representing the name of the tag

    Returns:
      An instance of a Tag, or None if there was no matches.

      For example, given the following XML structure:
        <enum>                    # This is the variable el
          Hello world             # NavigableString
          <value>Apple</value>    # this is the variale apple (Tag)
          <value>Orange</value>   # this is the variable orange (Tag)
          Hello world again       # NavigableString
        </enum>

        res = find_child_tag(el, 'value')
        # res is apple
    """
    matching_tags = find_all_child_tags(element, tag)
    if matching_tags:
        return matching_tags[0]
    else:
        return None

def find_kind(element):
  """
  Finds the kind Tag ancestor for an element.

  Args:
    element: a BeautifulSoup Tag

  Returns:
    a BeautifulSoup tag, or None if there was no matches

  Remarks:
    This function only makes sense to be called for an Entry, Clone, or
    InnerNamespace XML types. It will always return 'None' for other nodes.
  """
  kinds = ['dynamic', 'static', 'controls']
  parent_kind = find_parent_by_name(element, kinds)
  return parent_kind

def validate_error(msg):
  """
  Print a validation error to stderr.

  Args:
    msg: a string you want to be printed
  """
  print >> sys.stderr, "Validation error: " + msg


def validate_clones(soup):
  """
  Validate that all <clone> elements point to an existing <entry> element.

  Args:
    soup - an instance of BeautifulSoup

  Returns:
    True if the validation succeeds, False otherwise
  """
  success = True

  for clone in soup.find_all("clone"):
    clone_entry = clone['entry']
    clone_kind = clone['kind']

    parent_kind = find_kind(clone)

    find_entry = lambda x: x.name == 'entry'                           \
                       and find_kind(x) == clone_kind                  \
                       and fully_qualified_name(x) == clone_entry
    matching_entry = soup.find(find_entry)

    if matching_entry is None:
      error_msg = ("Did not find corresponding clone entry '%s' " +    \
               "with kind '%s'") %(clone_entry, clone_kind)
      validate_error(error_msg)
      success = False

  return success

# All <entry> elements with container=$foo have a <$foo> child
# If type="enum", <enum> tag is present
# In <enum> for all <value id="$x">, $x is numeric
def validate_entries(soup):
  """
  Validate all <entry> elements with the following rules:
    * If there is a container="$foo" attribute, there is a <$foo> child
    * If there is a type="enum" attribute, there is an <enum> child
    * In the <enum> child, all <value id="$x"> have a numeric $x

  Args:
    soup - an instance of BeautifulSoup

  Returns:
    True if the validation succeeds, False otherwise
  """
  success = True
  for entry in soup.find_all("entry"):
    entry_container = entry.attrs.get('container')

    if entry_container is not None:
      container_tag = entry.find(entry_container)

      if container_tag is None:
        success = False
        validate_error(("Entry '%s' in kind '%s' has type '%s' but " +  \
                 "missing child element <%s>")                          \
                 %(fully_qualified_name(entry), find_kind(entry),       \
                 entry_container, entry_container))

    enum = entry.attrs.get('enum')
    if enum and enum == 'true':
      if entry.enum is None:
        validate_error(("Entry '%s' in kind '%s' is missing enum")     \
                               % (fully_qualified_name(entry), find_kind(entry),
                                  ))
        success = False

      else:
        for value in entry.enum.find_all('value'):
          value_id = value.attrs.get('id')

          if value_id is not None:
            try:
              id_int = int(value_id, 0) #autoguess base
            except ValueError:
              validate_error(("Entry '%s' has id '%s', which is not" + \
                                        " numeric.")                   \
                             %(fully_qualified_name(entry), value_id))
              success = False
    else:
      if entry.enum:
        validate_error(("Entry '%s' kind '%s' has enum el, but no enum attr")  \
                               % (fully_qualified_name(entry), find_kind(entry),
                                  ))
        success = False

  return success

def validate_xml(file_name):
  """
  Validate all XML nodes according to the rules in validate_clones and
  validate_entries.

  Args:
    file_name - a string path to an XML file we wish to validate

  Returns:
    a BeautifulSoup instance if validation succeeds, None otherwise
  """

  xml = file(file_name).read()
  soup = BeautifulSoup(xml, features='xml')

  succ = validate_clones(soup)
  succ = validate_entries(soup) and succ

  if succ:
    return soup
  else:
    return None

#####################
#####################

if __name__ == "__main__":
  if len(sys.argv) <= 1:
    print >> sys.stderr, "Usage: %s <filename.xml>" % (sys.argv[0])
    sys.exit(0)

  file_name = sys.argv[1]
  succ = validate_xml(file_name) is not None

  if succ:
    print "%s: SUCCESS! Document validated" %(file_name)
    sys.exit(0)
  else:
    print >> sys.stderr, "%s: ERRORS: Document failed to validate" %(file_name)
    sys.exit(1)