普通文本  |  91行  |  2.28 KB

#!/usr/bin/env python
#
# This parser parses the output from Phil Harvey's exiftool (version 9.02)
# and convert it to xml format. It reads exiftool's output from stdin and
# write the xml format to stdout.
#
# In order to get the raw infomation from exiftool, we need to enable the verbose
# flag (-v2) of exiftool.
#
# Usage:
#      exiftool -v2 img.jpg | ./parser.py >> output.xml
#
#

import os
import sys
import re

text = sys.stdin.read()

print """<?xml version="1.0" encoding="utf-8"?>"""
print "<exif>"

# find the following two groups of string:
#
# 1. tag:
#
# | | | x) name = value
# | | |     - Tag 0x1234
#
# 2. IFD indicator:
#
# | | | + [xxx directory with xx entries]
#
p = re.compile(
        "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
        + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
        , re.M)
tags = p.findall(text)

layer = 0
ifds = []

for s in tags:
    # IFD indicator
    if s[2]:
        l = len(s[3])
        ifd = s[2][l + 3:].split()[0]
        new_layer = l / 2 + 1
        if new_layer > layer:
            ifds.append(ifd)
        else:
            for i in range(layer - new_layer):
                ifds.pop()
            ifds[-1] = ifd
        layer = new_layer
    else:
        l = len(s[1])
        s = s[0]
        new_layer = l / 2
        if new_layer < layer:
            for i in range(layer - new_layer):
                ifds.pop()
        layer = new_layer

        # find the ID
        _id = re.search("0x[0-9a-f]{4}", s)
        _id = _id.group(0)

        # find the name
        name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
        name = name.group(0).split()[1]

        # find the raw value in the parenthesis
        value = re.search("\(SubDirectory\) -->", s)
        if value:
            value = "NO_VALUE"
        else:
            value = re.search("\(.*\)\n", s)
            if (name != 'Model' and value):
                value = value.group(0)[1:-2]
            else:
                value = re.search("=.*\n", s)
                value = value.group(0)[2:-1]
                if "[snip]" in value:
                    value = "NO_VALUE"

        print ('    <tag ifd="' + ifds[-1] + '" id="'
            + _id + '" name="' + name +'">' + value + "</tag>")
print "</exif>"