#!/usr/bin/python

# stash reader code.  stash saver stuff should probably move here
# from page_saver_server.py eventually...


import re
import rfc822
import os

def listpaths(d):
    """os.listdir, but yielding full paths instead"""
    for f in os.listdir(d):
        yield os.path.join(d,f)

def listboth(d):
    """zip(listdir, listpaths) that actually works"""
    for f in os.listdir(d):
        yield f, os.path.join(d,f)

class Stash:
    """Stash of webclips"""
    def __init__(self, basedir):
        """A stash object is based on a top-level directory"""
        self.base = basedir
    def dates(self):
        """Yields all dates (yyyy,mm,dd as padded strings) in the stash"""
        for mo, pmo in listboth(self.base):
            if not os.path.isdir(pmo): continue
            m = re.match(r"(\d\d\d\d)-(\d\d)", mo)
            if not m: continue
            yyyy, mm = m.groups()
            for dy, pdy in listboth(pmo):
                if not os.path.isdir(pdy): continue
                m = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", dy)
                if not m: continue
                yyyy, mm, dd = m.groups()
                # TODO: decide if we should check if pdy is empty first
                # (or that it's implicit in the construction side)
                yield yyyy, mm, dd
        
    def items(self):
        """yields paths to all objects in stash [use clips() instead]"""
        for y,m,d in self.dates():
            daydir = os.path.join(self.base,
                                  "%s-%s" % (y,m),
                                  "%s-%s-%s" % (y,m,d))
            for clip, pclip in listboth(daydir):
                if not clip.endswith(".clip"): continue
                if not os.path.isfile(pclip): continue
                yield pclip

    class Clip:
        """A single clipping from a stash"""
        def __init__(self, p):
            """wrap ourselves around a path""" 
            self._path = p
            self._cached_msg = None
        def _fetch(self):
            """internal caching helper"""
            if not self._cached_msg:
                self._cached_msg = rfc822.Message(open(self.path(),"r"))
        def categories(self):
            """Returns list of categories for this clip"""
            self._fetch()
            h = self._cached_msg["ClipCategory"]
            if h == "input devices": h = "input_devices"
            return h.split(" ")
        def title(self):
            """Returns the title of the clipped page"""
            self._fetch()
            return self._cached_msg["ClipTitle"]
        def url(self):
            """Returns the url of the clipped page"""
            self._fetch()
            return self._cached_msg["ClipURL"]
        def referrer(self):
            """Returns the referrer of the clipped page"""
            self._fetch()
            return self._cached_msg["ClipReferrer"]
        def selection(self):
            """Returns the selection from the clipped page"""
            self._fetch()
            # these should probably be automatic or implicit, but
            # categories certainly needs fixups, and selection
            # probably does too
            return self._cached_msg["ClipSelection"]
        def path(self):
            """Returns path of this clip, for deeper abuse"""
            return self._path
        def original_page(self):
            """TODO: Returns the original page, in some form"""
            assert 0, "original_page not implemented"

    def clips(self):
        """Yields every clip in the stash"""
        for pclip in self.items():
            # is there a good reason to pick self.Clip vs. Stash.Clip?
            yield self.Clip(pclip)

    def tags(self):
        """Returns a dict of tags and their popularities"""
        tags = {}
        for c in self.clips():
            for cat in c.categories():
                tags[cat] = tags.get(cat,0) + 1
        return tags

    def clips_by_tag(self, *tags):
        """yields the clips that match all given tags"""
        for c in self.clips():
            for t in tags:
                if t not in c.categories():
                    break
            else:
                yield c

import sys
if __name__ == "__main__":
    prog, verb = sys.argv # need better idiom
    clipstash = Stash(os.path.expanduser("~/stufflog"))
    if verb == "tagpop":
        t = clipstash.tags()
        for tag, count in sorted(t.items(), lambda x,y: y[1] - x[1]):
            print "%d:" % count, tag
    elif verb == "trackforward":
        for c in clipstash.clips_by_tag("trackforward"):
            print c.path()
    else:
        sys.exit("tagpop, trackforward")