#!/usr/bin/python
# This is a second-cut code to html converter.
#
# Unlike any of the other modules I've looked at, it makes a rudimentary attempt
# to cross-reference variables (eventually, it will need to be two-pass.)
# This replaces the overburdened pile of regexps with the native tokenizer lib.
# Future directions include more literate-programming oriented features, like
# taking block comments [like this] and formatting them more smartly, or perhaps
# even extracting the __doc__ value. Perhaps even having a sidebar with notes
# that are somehow tagged as less-inline than normal comments.
#
# Copyright 2003 Mark W. Eichin <eichin@thok.org> The Herd Of Kittens
#
import sys
import tokenize
import re
from htmlentitydefs import entitydefs
invdefs = {}
for i in range(0,256):
c = "%c" % i
invdefs[c] = c
for k,v in entitydefs.items():
if len(v) == 1:
invdefs[v] = "&%s;" % k
def make_one_url(s):
if s.startswith("http://"):
return '<a href="%s">%s</a>' % (s, s)
return s
def urlify(s):
parts = re.split("(http://[^\s&]*)", s)
return "".join([make_one_url(p) for p in parts])
def httpquote(s):
return "".join([invdefs[v] for v in s])
def span_token(tag, str):
return """<span class="python-%s">%s</span>""" % (tag, str)
def link_target_wrap(s, name):
return '<a name="%s">%s</a>' % (name, s)
def link_ref_wrap(s, name):
return '<a href="#%s">%s</a>' % (name, s)
# should these be in formatter? probably
known_names = {}
known_imports = {}
known_aliases = {}
class formatter:
def __init__(self, outstream):
# we may not need this anymore
self.st = {"string":("outside",None)}
self.outstream = outstream
self.next_name = None
self.lastcol = 0
self.lastrow = 1
self.indent_list = [""]
self.please_indent = 1
def emit(self, txt):
self.outstream.write(txt)
def COMMENT(self, tstring):
self.emit(span_token("comment", urlify(httpquote(tstring))))
def NL(self, tstring):
self.emit(tstring)
self.please_indent = 1
self.next_name = None
def NEWLINE(self, tstring):
self.emit(tstring)
self.please_indent = 1
self.next_name = None
def passthrough(self, tstring):
self.emit(tstring)
self.next_name = None
OP = passthrough
NUMBER = passthrough
ENDMARKER = passthrough
def do_indent(self):
# self.emit("".join(self.indent_list))
self.emit(self.indent_list[-1]) # use the head
self.please_indent = None
def INDENT(self, tstring):
self.indent_list.append(tstring) # push
self.next_name = None
self.do_indent()
def DEDENT(self, tstring):
self.indent_list = self.indent_list[:-1] # pop
self.next_name = None
def STRING(self, tstring):
self.emit(span_token("quote", httpquote(tstring)))
self.next_name = None
def NAME(self, tstring):
if self.next_name:
return self.next_name(tstring)
try:
fn = getattr(self, "NAME_%s" % tstring)
except:
# try other context stuff?
self.emit(tstring)
return
fn(tstring)
def NAME_import(self, tstring):
self.next_name = self.import_pkg_name
self.emit(span_token("verb", tstring))
def NAME_from(self, tstring):
self.next_name = self.import_pkg_name_from
self.emit(span_token("verb", tstring))
def import_pkg_name(self, tstring):
# not the right test
if tstring.isalnum():
known_imports[tstring] = 1
self.emit(span_token("import", link_target_wrap(tstring,tstring)))
else:
self.emit(tstring)
self.next_name = None
def import_pkg_name_from(self, tstring):
# not the right test
if tstring.isalnum():
known_imports[tstring] = 1
self.emit(span_token("import", link_target_wrap(tstring,tstring)))
else:
self.emit(tstring)
self.next_name = None
self.next_name = None
# need a way to insert missing whitespace
# possibly less kludgily than counting offsets.
# maybe we just need to use the offsets to mark things up?
def process_token(self, tk):
ttype, tstring, spos, epos, line = tk
# print tokenize.tok_name[ttype]
# print tokenize.tok_name[ttype], tk
# dispatch on token type, emit appropriately, have a fallback
# add a summary of non-specially-handled ones
# handle the space...
thisrow, thiscol = spos
# print spos, epos
if thisrow > self.lastrow:
self.outstream.write("\n" * (thisrow - self.lastrow - 1))
self.lastcol = 0
if thiscol > self.lastcol:
# print >>self.outstream, [thisrow, thiscol, self.lastrow, self.lastcol],
self.outstream.write(" " * (thiscol - self.lastcol))
self.please_indent = None
self.lastrow, self.lastcol = epos
try:
fn = getattr(self, tokenize.tok_name[ttype])
except AttributeError:
print >>sys.stderr, "No match!", tokenize.tok_name[ttype], tstring
return
if ttype != tokenize.DEDENT and ttype != tokenize.INDENT and self.please_indent:
self.do_indent()
fn(tstring)
def format_stream(instream, outstream):
print >>outstream, """<pre class="python-src">"""
fmt = formatter(outstream)
for tk in tokenize.generate_tokens(instream.readline):
fmt.process_token(tk)
print >>outstream, """</pre>"""
CSS = """<style type="text/css" media=screen>
<!--
span.python-verb { color: cyan; }
span.python-comment { color: red; }
span.python-quote { color: orange; }
span.python-token { color: blue; }
span.python-name { color: green; }
span.python-import { color: green; }
span.python-alias { color: green; }
span.python-noun { color: brown; }
pre.python-src { color: grey; background: black; }
h1 { color: yellow; background: grey; }
body { color: yellow; background: grey; }
-->
</style>
"""
if __name__ == "__main__":
progname, input = sys.argv
f = open(input)
print """<html><head><title>%s</title></head>""" % input
print CSS
print """<body>%s<h1>%s</h1>""" % (
'<a href="index.html">[back]</a>',
'<a href="%s">%s</a>' % (input,input))
format_stream(f, sys.stdout)
print """</body></html>"""