#!/usr/bin/python import os import time import apsw import Image import StringIO import string from collections import namedtuple # imports from flickr_locations.py which builds the same way import xmlrpclib import urllib from flickr_post import sign_method, _get_auth_token, _api_key, flickr_xmlrpc_base, upload_one # enough multiple-ElementTree import nonsense :-) import xml.etree.ElementTree as etree # direct cropping on flickr: # look for a cropr_me tag # (first version: start from a picture url) # http://www.flickr.com/services/api/flickr.photos.search.html # http://www.flickr.com/services/api/flickr.photos.recentlyUpdated.html # actually fetch tags,url_o,url_m; we *want* notes but probably can't get them # (could pass them; they're ignored) def get_recently_updated(extras=["tags"]): fpru_args = sign_method(method="flickr.photos.recentlyUpdated", min_date = str(int(time.time()) - 5*24*60*60), extras=",".join(extras), auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) fpru_text = prox.flickr.photos.recentlyUpdated(fpru_args) # failed on Stra\xdfe (Strasse) in a description # with UnicodeEncodeError: 'ascii' codec can't encode character u'\xdf' in position 5374: ordinal not in range(128) # fpru_args was already Unicode, fromstring takes "text"... fpru_resp = etree.fromstring(fpru_text.encode("utf-8")) # ignore pages/perpage until later for child in fpru_resp.getchildren(): yield child def find_cropr_tagged_photos(): for photo in get_recently_updated(extras=["tags", "url_m", "url_o", "description"]): tags = set(photo.get("tags").split(" ")) if "croprme" not in tags: # normalized form! even if I wrote cropr_me; need photo info tags.tag[raw] # print photo.get("id"), tags continue yield photo # then look for a cropr_me *note* # http://www.flickr.com/services/api/flickr.photos.getInfo.html # photo.notes.note.text(); check author= (vs photo.owner.nsid?), keep id=, use x/y/w/h # also save photos.title for the later one # photos.tags.tag might be a good starting point too # preserve photo[license], save photo[rotation], photo.visibility, # photo.dates[taken], photo.permissions, photo.editability # and photo.urls.url(type=photopage).text() # http://www.flickr.com/services/api/flickr.photos.getPerms.html (for setPerms below) # http://www.flickr.com/services/api/flickr.photos.geo.getLocation.html # should use a memoize decorator; mainly saving so we can call it for free inside get_cropr_note_coords def get_info(photo_id, cache={}): if photo_id in cache: return cache[photo_id] gi_args = sign_method(method="flickr.photos.getInfo", photo_id=str(photo_id), auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) gi_text = prox.flickr.photos.getInfo(gi_args) gi_resp = etree.fromstring(gi_text) # ignore pages/perpage until later cache[photo_id] = gi_resp return gi_resp # for debugging purposes: # del auto_cropr.get_info.func_defaults[0][int(photo_id)] # note_coords = namedtuple('note_coords', 'x y w h', rename=True) # (but we don't get rename until python 3.1) # (once we've checked, we don't care about author or authorname) note_coords = namedtuple('note_coords', 'x y w h id') def note_coords_from_dict(d): # they happen to be all numeric, including id, so cheat a little return dict([(k, int(v)) for k,v in d.iteritems() if k in note_coords._fields]) def pil_box_from_note_coords(coo): # "a 4-tuple defining the left, upper, right, and lower pixel coordinate." # 1.1.7-2build2 (natty) complains "integer argument expected, got float" # when it actually executes the crop... return map(int, (coo.x, coo.y, coo.x+coo.w, coo.y+coo.h)) def notes_edit(coords, text): ne_args = sign_method(method="flickr.photos.notes.edit", note_id=str(coords.id), note_x=str(coords.x), note_y=str(coords.y), note_w=str(coords.w), note_h=str(coords.h), note_text=text, auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) ne_text = prox.flickr.photos.notes.edit(ne_args) if not ne_text.strip(): # all good "empty success response if it completes without error." return # otherwise it should have thrown xmlrpclib.Fault... raise ValueError("non-blank return <%s> from notes_edit" % ne_text) def get_cropr_note_coords(photo_id): gi = get_info(photo_id) for note in gi.find("notes").findall("note"): if gi.find("owner").attrib["nsid"] != note.attrib["author"]: # note by someone else continue if note.text != "cropr_me": # TODO: we could accept other things, but it doesn't allow "blank" # maybe accept "add your note text here"? continue coords = note_coords(**note_coords_from_dict(note.attrib)) yield coords # download "original" # http://www.flickr.com/services/api/flickr.photos.getSizes.html # size label="Original" source=*.jpg; maybe grab which ever other one satisfies # "Coordinates and sizes are in pixels, based on the 500px image size shown on individual photo pages." # (is that always label="Medium"?) image_sizes = namedtuple('image_sizes', 'width height label source url media') def get_sizes(photo_id): gs_args = sign_method(method="flickr.photos.getSizes", photo_id=str(photo_id), auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) gs_text = prox.flickr.photos.getSizes(gs_args) # # (obey that, later) gs_resp = etree.fromstring(gs_text) for size in gs_resp.findall("size"): yield image_sizes(**size.attrib) def get_original_image(url): # use pycurl or other cleverness later - now just get the bits # esp for testing, look for a cached version! picture_cache = apsw.Connection(os.path.expanduser("~/.auto_cropr.db")) cache_cursor = picture_cache.cursor() cache_cursor.execute("create table if not exists cache(url, imagedata)") for (imagedata,) in cache_cursor.execute("select imagedata from cache where url=?", (url,)): return imagedata imagedata = urllib.urlopen(url).read() if len(imagedata) < 8000: # TODO: *actually* get the status and fail! print "bad image from real get_original_image(%s):" % url, imagedata raise Exception("Invalid image from %s" % url) cache_cursor.execute("insert into cache values(:url, :imagedata)", dict(url=url, imagedata=buffer(imagedata))) return imagedata # rotate # crop the right part # rotate back # (actually, how about transforming the coords into orig-space, and rotating after upload) # copy/hack metadata # (test version: stop here, view the result) # /usr/share/doc/python-pyexiv2/ for the non-flickr stuff? # upload # (flickr_post.upload_one) # replace original note with a link to the new picture # http://www.flickr.com/services/api/flickr.photos.notes.edit.html def replace_note_text(photo_id, note_id, new_text): for coords in get_cropr_note_coords(photo_id): if coords.id == note_id: notes_edit(coords, new_text) return raise IndexError("No matching note for %s in %s" % (note_id, photo_id)) # remove cropr_me tag from original picture # http://www.flickr.com/services/api/flickr.photos.removeTag.html def remove_tag(tag_id): rt_args = sign_method(method="flickr.photos.removeTag", tag_id=tag_id, auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) rt_text = prox.flickr.photos.removeTag(rt_args) if not rt_text.strip(): # all good "empty success response if it completes without error." return # otherwise it should have thrown xmlrpclib.Fault... raise ValueError("non-blank return <%s> from remove_tag" % rt_text) def remove_cropr_tag(photo_id): gi = get_info(photo_id) for tag in gi.find("tags").findall("tag"): # *could* check tag.attrib["raw"] but that's not consistent with how we search if tag.text == "croprme": # tag.id is long and includes the photo_id remove_tag(tag.attrib["id"]) print "removed", tag.attrib["id"] # put link to original caption in caption (with crop coords) # http://www.flickr.com/services/api/flickr.photos.setMeta.html (title+description) # fix metadata # http://www.flickr.com/services/api/flickr.photos.setDates.html (date_taken) def set_date_taken(photo_id, date_taken, granularity): # should have optional date_posted, but we trust flickr for that sd_args = sign_method(method="flickr.photos.setDates", photo_id=str(photo_id), date_taken=date_taken, date_taken_granularity=granularity, auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) sd_text = prox.flickr.photos.setDates(sd_args) if not sd_text.strip(): # all good "empty success response if it completes without error." return # otherwise it should have thrown xmlrpclib.Fault... raise ValueError("non-blank return <%s> from set_date_taken" % sd_text) # http://www.flickr.com/services/api/flickr.photos.setPerms.html # (is_public, is_friend, is_family, perm_comment, perm_addmeta) # http://www.flickr.com/services/api/flickr.photos.setSafetyLevel.html # (safety_level, hidden - from photo.visibility) # http://www.flickr.com/services/api/flickr.photos.setTags.html (handled by uploader) # http://www.flickr.com/services/api/flickr.photos.geo.setLocation.html def geo_set_location(photo_id, lat, lon, accuracy, context): gsl_args = sign_method(method="flickr.photos.geo.setLocation", photo_id=str(photo_id), lat=lat, lon=lon, accuracy=accuracy, context=context, auth_token=_get_auth_token(), api_key=_api_key) prox = xmlrpclib.ServerProxy(flickr_xmlrpc_base) gsl_text = prox.flickr.photos.geo.setLocation(gsl_args) if not gsl_text.strip(): # all good "empty success response if it completes without error." return # otherwise it should have thrown xmlrpclib.Fault... raise ValueError("non-blank return <%s> from geo_set_location" % gsl_text) # per http://www.flickr.com/groups/api/discuss/72157616713786392/ base58_chars = string.digits + string.lowercase + string.uppercase base58_chars = base58_chars.replace("0", "") base58_chars = base58_chars.replace("O", "") base58_chars = base58_chars.replace("I", "") base58_chars = base58_chars.replace("l", "") assert len(base58_chars) == 58 def flickr_short_url(photo_id): # http://flic.kr/p/{base58-photo-id} num = int(photo_id) rep = "" while num: num, digit = divmod(num, len(base58_chars)) rep += base58_chars[digit] return "http://flic.kr/p/" + rep[::-1] def do_everything(): """does everything, as me, for my account...""" for photo in find_cropr_tagged_photos(): photo_id = photo.get("id") photo_info = get_info(photo_id) photo_rot = photo_info.attrib["rotation"] # turns out notes are in orig-space, so we just copy the rotation through! for coords in get_cropr_note_coords(photo_id): sizes = dict(((size.label, size) for size in get_sizes(photo_id))) assert sizes["Medium"].width == "500" or sizes["Medium"].height == "500" assert "Original" in sizes # TODO: handle rotation here? scale_x = float(sizes["Original"].width)/float(sizes["Medium"].width) scale_y = float(sizes["Original"].height)/float(sizes["Medium"].height) print "scale by", scale_x, scale_y print "note:", coords new_coords = note_coords(x=coords.x*scale_x, y=coords.y*scale_y, w=coords.w*scale_x, h=coords.h*scale_y, id=coords.id) print "image:", new_coords img_bytes = get_original_image(sizes["Original"].source) print len(img_bytes) if len(img_bytes) < 8000: print "ERR:", sizes["Original"].source, img_bytes # http://www.pythonware.com/library/pil/handbook/image.htm tells us # to use Image.open on a StringIO... pil_image = Image.open(StringIO.StringIO(img_bytes)) print "old format", pil_image.format cropped_image = pil_image.crop(pil_box_from_note_coords(new_coords)) print "crop size", cropped_image.size # figure out JFIF thumbnails later, if at all cropped_image.show() new_bytes = StringIO.StringIO() cropped_image.save(new_bytes, pil_image.format) new_bytes.seek(0) photopage_url = photo_info.find("urls").find("url").text old_tags = [tag.attrib["raw"] for tag in photo_info.find("tags").findall("tag") if tag.text != "croprme"] # replaces: raw_tags=photo.get("tags"), new_photo_id = upload_one(photopath=photo.get("title"), # my convention... photobytes=new_bytes.getvalue(), title=photo.get("title") + " note-cropped", description=(photo.find("description").text or "") + " (cropped from " + photopage_url + " )", tags=old_tags, public=True, # friends=, # family=, fake=False, rotate=photo_rot, # coords=, async=False) # implement those later... print "new photo id", new_photo_id print "short url", flickr_short_url(new_photo_id) # clean up replace_note_text(photo_id, coords.id, flickr_short_url(new_photo_id)) # fix other metadata set_date_taken(new_photo_id, photo_info.find("dates").attrib["taken"], photo_info.find("dates").attrib["takengranularity"]) if photo_info.find("location"): oldloc = photo_info.find("location").attrib geo_set_location(new_photo_id, oldloc["latitude"], oldloc["longitude"], oldloc["accuracy"], oldloc["context"]) # only remove the cropr_me tag *once* remove_cropr_tag(photo_id) if __name__ == "__main__": do_everything()