capt_to_kimdaba.py

#!/usr/bin/python
import os
import sys
import elementtree.ElementTree
import kimdaba_album

# tool for finding old .capt or .xml files, slurping them into the
# kimdaba index.xml, and purging the old files.

# sample foo.jpg.xml looks like:
# <?xml version="1.0" encoding="UTF-8"?>
# <image><description>
# <field name="title">xmas wreath</field>
# </description>
# <bins></bins>
# <exif></exif>
# </image>
#
#   more complete form:
# <?xml version="1.0" encoding="UTF-8"?><image><description>
# <field name="location">Concord, MA</field>
# <field name="people"></field>
# <field name="description">Snow still on the ground in late march.</field>
# <field name="event"></field>
# <field name="title">Snow in the yard</field>
# </description>
# <bins></bins>
# <exif>
#       <tag name="MeteringMode">
#       </tag>
#       <tag name="JPEG_Type">
#          Baseline
#       </tag>
#    </exif>
# </image>

# <exif> is entirely stuff in the .jpg
# <bins> is stuff pushed back in later
#   so ignore both... but where is requested rotation stored???

# 
#
# foo.jpg.capt looks like:
# title: xmas wreath
#
# capts (mixed-case tag names):
#  Description
#  Title
#  Event
#  Location
#  Mail (only one value, and not *about* the picture)
#  People (comma separated, ? means unk. or partly unk)
#
# (generate mappings in advance...)

def get_fields_from_xml(pathpart):
    print "GET:", pathpart
    xstuff = elementtree.ElementTree.ElementTree(file=pathpart + ".xml")
    # <field name="description">Snow still on the ground in late march.</field>
    fields = {}
    for field in xstuff.findall("description/field"):
        fields[field.get("name")] = field.text
    return fields
    
known_fields = set(["location", "people", "description", "event", "title", "mail"])

# map title-cased capt values to corresponding Persons options on the
# kimdaba side
peoplemap = {
    }

# get this from options/option off the top level...
# however, to get a clean run you can prime it with values here
known_people = set()
def load_known_people(options):
    # (album.findall("options/option"))
    for option in options:
        if option.get("name") == "Persons":
            for value in option.findall("value"):
                known_people.add(value.get("value"))
    print known_people

def add_img_keyword(img, kind, value):
    assert kind in set(["Keywords", "Persons", "Locations"])
    for optionset in img.findall("options/option"):
        if optionset.get("name") == kind:
            # avoid duplicates?
            optionset.append(optionset.makeelement("value", dict(value=value)))
            return True
    else:
        if not img.findall("options"):
            img.append(img.makeelement("options", dict()))
        options = img.find("options")
        options.append(options.makeelement("option", dict(name=kind)))
        optionset = img.find("options/option")
        optionset.append(optionset.makeelement("value", dict(value=value)))
        return True

def add_img_person(img, person):
    return add_img_keyword(img, "Persons", person)

class converter:
    @classmethod
    def convert(cls, img, fieldname, fieldvalue):
        methname = "convert_%s" % fieldname
        if not hasattr(cls, methname):
            raise KeyError("No converter for %s" % fieldname)
        return getattr(cls, methname)(img, fieldvalue)
    @classmethod
    def convert_location(cls, img, fieldvalue):
        print "WHERE:", fieldvalue
    @classmethod
    def convert_people(cls, img, fieldvalue):
        if fieldvalue == "sassafras foxy":
             fieldvalue = "sassafras, foxy"
        for person in fieldvalue.split(","):
            person = person.strip().title()
            if person in peoplemap:
                add_img_person(img, peoplemap[person])
            elif person in known_people:
                add_img_person(img, person)                
            else:
                print "WHO:", person
        return True # only if not printed?
    @classmethod
    def convert_description(cls, img, fieldvalue):
        img.set("description", fieldvalue)
        return True
    @classmethod
    def convert_event(cls, img, fieldvalue):
        print "EVENT:", fieldvalue
        # sometimes this appends to description...
    @classmethod
    def convert_title(cls, img, fieldvalue):
        img.set("label", fieldvalue)
        return True
    @classmethod
    def convert_mail(cls, img, fieldvalue):
        # the mail tag was a hint that one of the tools should
        # *send it as email* to that person, not that it was anything
        # about the picture, so just discard it explicitly.
        return

def convert_fields(img, fields):
    did_something = False
    for field in fields:
        did_something = converter.convert(img, field, fields[field]) or did_something
    return did_something


def process_bins(workdir, fake=False):
    """Process any bins/capt remnants into the xml file."""
    # grab the filenames, then walk the xml
    has_xml = set([os.path.join(workdir, f.replace(".xml", "", 1))
                   for f in os.listdir(workdir) 
                   if f.endswith(".jpg.xml")])
    albumfile = kimdaba_album.kimdaba_default_album()
    album = kimdaba_album.parse(albumfile)
    load_known_people(album.findall("options/option"))
    did_something = False
    nuke_remnants = []
    for img in album.findall("images/image"):
        pathpart = img.get("file")
        if pathpart in has_xml:
            fields = get_fields_from_xml(pathpart)
            # make sure we don't have anything strange...
            unkfields = set(fields) - known_fields
            if unkfields:
                print "Unknown fields", unkfields, "in", pathpart
                raise Exception("Bad Field %s" % unkfields)
            # mapper class?            
            if convert_fields(img, fields):
                did_something =  True
                nuke_remnants.append(pathpart)
    if did_something:
        if not fake:
            kimdaba_album.safe_replace(albumfile, album)
            for remnant in nuke_remnants:
                os.remove(remnant + ".capt")
                os.remove(remnant + ".xml")
            print "NOW re-run kimdaba, to get the new keywords in the menu"
        else:
            print "diff -u", albumfile, "/tmp/c2k.xml"
            album.write("/tmp/c2k.xml")
            for remnant in nuke_remnants:
                print "rm", remnant, ".capt/.xml"
            

if __name__ == "__main__":
    no_act = False
    if "--no-act" in sys.argv:
        no_act = True
        sys.argv.remove("--no-act")
    prog, workpath = sys.argv

    # workpath should be the yeardir; we should be in
    # the dir with index.xml when we run this
    process_bins(workpath, fake=no_act)