#!/usr/bin/python
#
# Copyright (C) 2012 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Usage:
metadata_validate.py <filename.xml>
- validates that the metadata properties defined in filename.xml are
semantically correct.
- does not do any XSD validation, use xmllint for that (in metadata-validate)
Module:
A set of helpful functions for dealing with BeautifulSoup element trees.
Especially the find_* and fully_qualified_name functions.
Dependencies:
BeautifulSoup - an HTML/XML parser available to download from
http://www.crummy.com/software/BeautifulSoup/
"""
from bs4 import BeautifulSoup
from bs4 import Tag
import sys
#####################
#####################
def fully_qualified_name(entry):
"""
Calculates the fully qualified name for an entry by walking the path
to the root node.
Args:
entry: a BeautifulSoup Tag corresponding to an <entry ...> XML node
Returns:
A string with the full name, e.g. "android.lens.info.availableApertureSizes"
"""
filter_tags = ['namespace', 'section']
parents = [i['name'] for i in entry.parents if i.name in filter_tags]
name = entry['name']
parents.reverse()
parents.append(name)
fqn = ".".join(parents)
return fqn
def find_parent_by_name(element, names):
"""
Find the ancestor for an element whose name matches one of those
in names.
Args:
element: A BeautifulSoup Tag corresponding to an XML node
Returns:
A BeautifulSoup element corresponding to the matched parent, or None.
For example, assuming the following XML structure:
<static>
<anything>
<entry name="Hello" /> # this is in variable 'Hello'
</anything>
</static>
el = find_parent_by_name(Hello, ['static'])
# el is now a value pointing to the '<static>' element
"""
matching_parents = [i.name for i in element.parents if i.name in names]
if matching_parents:
return matching_parents[0]
else:
return None
def find_all_child_tags(element, tag):
"""
Finds all the children that are a Tag (as opposed to a NavigableString),
with a name of tag. This is useful to filter out the NavigableString out
of the children.
Args:
element: A BeautifulSoup Tag corresponding to an XML node
tag: A string representing the name of the tag
Returns:
A list of Tag instances
For example, given the following XML structure:
<enum> # This is the variable el
Hello world # NavigableString
<value>Apple</value> # this is the variale apple (Tag)
<value>Orange</value> # this is the variable orange (Tag)
Hello world again # NavigableString
</enum>
lst = find_all_child_tags(el, 'value')
# lst is [apple, orange]
"""
matching_tags = [i for i in element.children if isinstance(i, Tag) and i.name == tag]
return matching_tags
def find_child_tag(element, tag):
"""
Finds the first child that is a Tag with the matching name.
Args:
element: a BeautifulSoup Tag
tag: A String representing the name of the tag
Returns:
An instance of a Tag, or None if there was no matches.
For example, given the following XML structure:
<enum> # This is the variable el
Hello world # NavigableString
<value>Apple</value> # this is the variale apple (Tag)
<value>Orange</value> # this is the variable orange (Tag)
Hello world again # NavigableString
</enum>
res = find_child_tag(el, 'value')
# res is apple
"""
matching_tags = find_all_child_tags(element, tag)
if matching_tags:
return matching_tags[0]
else:
return None
def find_kind(element):
"""
Finds the kind Tag ancestor for an element.
Args:
element: a BeautifulSoup Tag
Returns:
a BeautifulSoup tag, or None if there was no matches
Remarks:
This function only makes sense to be called for an Entry, Clone, or
InnerNamespace XML types. It will always return 'None' for other nodes.
"""
kinds = ['dynamic', 'static', 'controls']
parent_kind = find_parent_by_name(element, kinds)
return parent_kind
def validate_error(msg):
"""
Print a validation error to stderr.
Args:
msg: a string you want to be printed
"""
print >> sys.stderr, "Validation error: " + msg
def validate_clones(soup):
"""
Validate that all <clone> elements point to an existing <entry> element.
Args:
soup - an instance of BeautifulSoup
Returns:
True if the validation succeeds, False otherwise
"""
success = True
for clone in soup.find_all("clone"):
clone_entry = clone['entry']
clone_kind = clone['kind']
parent_kind = find_kind(clone)
find_entry = lambda x: x.name == 'entry' \
and find_kind(x) == clone_kind \
and fully_qualified_name(x) == clone_entry
matching_entry = soup.find(find_entry)
if matching_entry is None:
error_msg = ("Did not find corresponding clone entry '%s' " + \
"with kind '%s'") %(clone_entry, clone_kind)
validate_error(error_msg)
success = False
return success
# All <entry> elements with container=$foo have a <$foo> child
# If type="enum", <enum> tag is present
# In <enum> for all <value id="$x">, $x is numeric
def validate_entries(soup):
"""
Validate all <entry> elements with the following rules:
* If there is a container="$foo" attribute, there is a <$foo> child
* If there is a type="enum" attribute, there is an <enum> child
* In the <enum> child, all <value id="$x"> have a numeric $x
Args:
soup - an instance of BeautifulSoup
Returns:
True if the validation succeeds, False otherwise
"""
success = True
for entry in soup.find_all("entry"):
entry_container = entry.attrs.get('container')
if entry_container is not None:
container_tag = entry.find(entry_container)
if container_tag is None:
success = False
validate_error(("Entry '%s' in kind '%s' has type '%s' but " + \
"missing child element <%s>") \
%(fully_qualified_name(entry), find_kind(entry), \
entry_container, entry_container))
enum = entry.attrs.get('enum')
if enum and enum == 'true':
if entry.enum is None:
validate_error(("Entry '%s' in kind '%s' is missing enum") \
% (fully_qualified_name(entry), find_kind(entry),
))
success = False
else:
for value in entry.enum.find_all('value'):
value_id = value.attrs.get('id')
if value_id is not None:
try:
id_int = int(value_id, 0) #autoguess base
except ValueError:
validate_error(("Entry '%s' has id '%s', which is not" + \
" numeric.") \
%(fully_qualified_name(entry), value_id))
success = False
else:
if entry.enum:
validate_error(("Entry '%s' kind '%s' has enum el, but no enum attr") \
% (fully_qualified_name(entry), find_kind(entry),
))
success = False
return success
def validate_xml(file_name):
"""
Validate all XML nodes according to the rules in validate_clones and
validate_entries.
Args:
file_name - a string path to an XML file we wish to validate
Returns:
a BeautifulSoup instance if validation succeeds, None otherwise
"""
xml = file(file_name).read()
soup = BeautifulSoup(xml, features='xml')
succ = validate_clones(soup)
succ = validate_entries(soup) and succ
if succ:
return soup
else:
return None
#####################
#####################
if __name__ == "__main__":
if len(sys.argv) <= 1:
print >> sys.stderr, "Usage: %s <filename.xml>" % (sys.argv[0])
sys.exit(0)
file_name = sys.argv[1]
succ = validate_xml(file_name) is not None
if succ:
print "%s: SUCCESS! Document validated" %(file_name)
sys.exit(0)
else:
print >> sys.stderr, "%s: ERRORS: Document failed to validate" %(file_name)
sys.exit(1)