#!/usr/bin/python
#
# takes a file, or a directory and walks it
# looks for .capt files, if they're newer than the .xml files, generates.
#
from __future__ import generators

import os
import sys
import re

# like os.listdir but gives the full path
def full_listdir(dirname):
    return [os.path.join(dirname, f) for f in os.listdir(dirname)]

def all_listdir(dirname):
    for i in full_listdir(dirname):
        if os.path.isfile(i):
            yield i
        elif os.path.islink(i):
            pass
        elif os.path.isdir(i):
            for i in all_listdir(i): yield i

def xmlquote(s):
    return s.replace("&","&amp;").replace("<", "&lt;").replace(">", "&lt;")

def capt_to_xml(captname, xmlname):
    """read the caption file, write out the xml file"""
    # implement this
    buf = open(captname, "r").read()
    # capt lines are explicitly iso8859-1
    fields = dict([re.match("^(\S+): (.*)$", line.decode("iso8859-1")).groups() for line in buf.splitlines() if line.strip()])
    # xml is explicitly UTF-8, though
    output = "\n".join(['<?xml version="1.0" encoding="UTF-8"?>',
                        '<image><description>']
                       + ['<field name="%s">%s</field>' % (k.lower().encode("utf-8"),xmlquote(v).encode("utf-8")) for k,v in fields.items()]
                       + ['</description>',
                          '<bins></bins>',
                          '<exif></exif>',
                          '</image>'])
    xml = open(xmlname, "w")
    print >> xml, output
    xml.close()


def capt_dir(dirname):
    """read the directory, look for capt files, handle them"""
    for f in all_listdir(dirname):
        if f.endswith(".capt"):
            xmlfile = re.sub("\\.capt$", ".xml", f)
            if (not os.path.isfile(xmlfile)) or \
                   (os.path.getmtime(xmlfile) < os.path.getmtime(f)) or \
                   (os.path.getmtime(xmlfile) < os.path.getmtime(progpath)):
                print "Updating", xmlfile
                capt_to_xml(f, xmlfile)
            #else:
            #    print "Keeping ", xmlfile

if __name__ == "__main__":
    progpath = sys.argv[0]
    for i in sys.argv[1:]:
        capt_dir(i)