#!/usr/bin/env python # # This parser parses the output from Phil Harvey's exiftool (version 9.02) # and convert it to xml format. It reads exiftool's output from stdin and # write the xml format to stdout. # # In order to get the raw infomation from exiftool, we need to enable the verbose # flag (-v2) of exiftool. # # Usage: # exiftool -v2 img.jpg | ./parser.py >> output.xml # # import os import sys import re text = sys.stdin.read() print """<?xml version="1.0" encoding="utf-8"?>""" print "<exif>" # find the following two groups of string: # # 1. tag: # # | | | x) name = value # | | | - Tag 0x1234 # # 2. IFD indicator: # # | | | + [xxx directory with xx entries] # p = re.compile( "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|" + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" , re.M) tags = p.findall(text) layer = 0 ifds = [] for s in tags: # IFD indicator if s[2]: l = len(s[3]) ifd = s[2][l + 3:].split()[0] new_layer = l / 2 + 1 if new_layer > layer: ifds.append(ifd) else: for i in range(layer - new_layer): ifds.pop() ifds[-1] = ifd layer = new_layer else: l = len(s[1]) s = s[0] new_layer = l / 2 if new_layer < layer: for i in range(layer - new_layer): ifds.pop() layer = new_layer # find the ID _id = re.search("0x[0-9a-f]{4}", s) _id = _id.group(0) # find the name name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s) name = name.group(0).split()[1] # find the raw value in the parenthesis value = re.search("\(SubDirectory\) -->", s) if value: value = "NO_VALUE" else: value = re.search("\(.*\)\n", s) if (name != 'Model' and value): value = value.group(0)[1:-2] else: value = re.search("=.*\n", s) value = value.group(0)[2:-1] if "[snip]" in value: value = "NO_VALUE" print (' <tag ifd="' + ifds[-1] + '" id="' + _id + '" name="' + name +'">' + value + "</tag>") print "</exif>"