#!/usr/bin/env python

"""
jemdoc: light markup.

version 0.2.0, October 2007.
"""

# Copyright (C) 2007 Jacob Mattingley.
#
# This file is part of jemdoc.
#
# jemdoc is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.
#
# jemdoc is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

import sys
import re
import time
import StringIO

# better checking of arguments?
def showhelp():
    print """Usage: jemdoc [OPTIONS] [SOURCEFILE] 
Produces html markup from a jemdoc SOURCEFILE.

Most of the time you can use jemdoc without any additional flags.
For example, typing

    jemdoc index.jemdoc

will produce an index.html, using a default configuration.  You can
change the output file by using -o OUTFILE, for example

    jemdoc -o html/main.html index.jemdoc

Some configuration options can be overridden by specifying a
configuration file.  You can use

    jemdoc --show-config

to print a sample configuration file (which includes all of the
default options). Any or all of the configuration [blocks] can be
overwritten by including them in a configuration file, and running,
for example,

    jemdoc -c mywebsite.conf index.jemdoc 

See http://jemdoc.jaboc.net/ for more details.
"""

def standardconf():
    return """[firstbit]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta name="generator" content="jemdoc, see jaboc.net" />
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<link rel="stylesheet" href="./jemdoc.css" type="text/css" />

[windowtitle]
# used in header for window title.
<title>|</title>

[doctitle]
# used at top of document.
<div id="toptitle">
<h1>|</h1>

[subtitle]
<div id="subtitle">|</div>

[doctitleend]
</div>

[bodystart]
</head>
<body>

[menustart]
<table summary="Table for page layout.">
<tr valign="top">
<td id="layout-menu">

[menuend]
</td>
<td>
<div id="layout-content">

[menucategory]
<div class="menu-category">|</div>

[menuitem]
<div><a href="|1">|2</a></div>

[currentmenuitem]
<div><a href="|1" class="current">|2</a></div>

[nomenu]
<div id="layout-content">

[menulastbit]
</div>
</td>
</tr>
</table>

[nomenulastbit]
</div>

[bodyend]
</body>
</html>

[infoblock]
<div class="infoblock">

[codeblock]
<div class="codeblock">

[blocktitle]
<div class="blocktitle">|</div>

[infoblockcontent]
<div class="blockcontent">

[codeblockcontent]
<div class="blockcontent"><pre>

[codeblockend]
</pre></div></div>

[infoblockend]
</div></div>

[footerstart]
<div id="footer">
<div id="footer-text">

[footerend]
</div>
</div>

[lastupdated]
Last updated |, using <a href="http://jemdoc.jaboc.net/">jemdoc</a>.
"""

class JandalError(Exception):
    pass

def raisejandal(msg, line=0):
    if line == 0:
        s = "%s" % msg
    else:
        s = "line %d: %s" % (line, msg)
    raise JandalError(s)

if len(sys.argv) == 1 or sys.argv[1] in ('--help', '-h'):
    showhelp()
    raise SystemExit
elif len(sys.argv[2:]) % 2 != 0:
    raise JandalError('invalid arguments, try --help')
if sys.argv[1] == '--show-config':
    print standardconf()
    raise SystemExit
else:
    inname = sys.argv[-1]
    outname = re.search(r'(.*)\.', inname).group(1) + '.html'

outoverride = False
confoverride = False
confnames = []
for i in range(1, len(sys.argv) - 1, 2):
    if sys.argv[i] == '-o':
        if outoverride:
            raise RuntimeError("only one output file, please")
        outname = sys.argv[i+1]
        outoverride = True
    elif sys.argv[i] == '-c':
        if confoverride:
            raise RuntimeError("only one config file, please")
        confnames.append(sys.argv[i+1])
        confoverride = True
    else:
        raise RuntimeError('unrecognised argument %s, try --help' % sys.argv[i])

def readnoncomment(f):
    l = f.readline()
    if l == '':
        return l
    elif l[0] == '#': # jem: be a little more generous with the comments we accept?
        return readnoncomment(f)
    else:
        return l.rstrip() + '\n' # leave just one \n and no spaces etc.

def parseconf(cns):
    syntax = {}
    warn = False # jem. make configurable?
    # manually add the defaults as a file handle.
    fs = [StringIO.StringIO(standardconf())]
    for sname in cns:
        fs.append(open(sname))

    for f in fs:
        while pc(f) != '':
            l = readnoncomment(f)
            r = re.match(r'\[(.*)\]\n', l)

            if r:
                tag = r.group(1)

                s = ''
                l = readnoncomment(f)
                while l not in ('\n', ''):
                    s += l
                    l = readnoncomment(f)

                syntax[tag] = s

        f.close()

    return syntax

def insertmenuitems(mname, current):
    f = open(mname)
    while pc(f) != '':
        l = readnoncomment(f)
        l = l.strip()
        if l == '':
            continue

        r = re.match(r'\s*(.*?)\s*\[(.*)\]', l)

        if r: # then we have a link.
            if r.group(2) == current: 
                hb(conf['currentmenuitem'], r.group(2), br(r.group(1)))
            else:
                hb(conf['menuitem'], r.group(2), br(r.group(1)))

        else: # menu category.
            hb(conf['menucategory'], br(l))

    f.close()


infile = open(inname)
outfile = open(outname, 'w')

def out(s):
    outfile.write(s)

def hb(tag, content1, content2=None):
    """Writes out a halfblock (hb)."""
    if content2 is None:
        out(re.sub(r'\|', content1, tag))
    else:
        r = re.sub(r'\|1', content1, tag)
        r = re.sub(r'\|2', content2, r)
        out(r)

def pc(f = infile):
    """Peeks at next character in the file."""
    # Should only be used to look at the first character of a new line.
    c = f.read(1)
    if c: # only undo forward movement if we're not at the end.
        #if c == '#': # interpret comment lines as blank.
        #    return '\n'

        if c in ' \t':
            return pc()

        f.seek(-1, 1)

    return c

def nl(withcount=False, codemode=False):
    global linenum
    """Get input file line."""
    s = infile.readline()
    linenum += 1
    if not codemode:
        # remove any special characters - assume they were checked by pc() before
        # we got here.
        # remove any trailing comments.
        s = s.lstrip(' \t')
        s = re.sub(r'\s*(?<!\\)#.*', '', s)

    if withcount:
        if s[0] == '.':
            m = r'\.'
        else:
            m = s[0]

        r = re.match('(%s+) ' % m, s)
        if not r:
            raise SyntaxError("couldn't handle the jandal (code 12039) on line"
                              " %d" % linenum)

        if not codemode:
            s = s.lstrip('-.=:')

        return (s, len(r.group(1)))
    else:
        if not codemode:
            s = s.lstrip('-.=:')

        return s

def np(withcount=False):
    """Gets the next paragraph from the input file."""
    # New paragraph markers signalled by characters in following tuple.
    if withcount:
        (s, c) = nl(withcount)
    else:
        s = nl()

    while pc() not in ('\n', '-', '.', ':', '', '=', '#', '~', '{'):
        s += nl()

    while pc() == '\n':
        nl() # burn blank line.

    # in both cases, ditch the trailing \n.
    if withcount:
        return (s[:-1], c)
    else:
        return s[:-1]

def quote(s, dollars=False):
    if dollars:
        return re.sub(r"""[\\*/+"'<>$\.~[\]-]""", r'\\\g<0>', s)
    else:
        return re.sub(r"""[\\*/+"'<>\.~[\]-]""", r'\\\g<0>', s)

def replacequoted(b):
    """Quotes {{raw html}} sections. Insert a backslash right before the end
    with &bs;, an illegal html character."""
    r = re.compile(r'\{\{(.*?)\}\}', re.M + re.S)
    m = r.search(b)
    while m:
        qb = quote(m.group(1), True)

        b = b[:m.start()] + qb + b[m.end():]

        m = r.search(b, m.start())

    # likewise replace $sections$ as +{{sections}}+.
    r = re.compile(r'(?<!\\)\$(.*?)(?<!\\)\$', re.M + re.S)
    m = r.search(b)
    while m:
        qb = '+' + quote(m.group(1)) + '+'

        b = b[:m.start()] + qb + b[m.end():]

        m = r.search(b, m.start())

    return b

def replacelinks(b):
    # works with [link.html new link style].
    r = re.compile(r'(?<!\\)\[(.*?)(?:\s(.*?))?(?<!\\)\]', re.M + re.S)
    m = r.search(b)
    while m:
        m1 = m.group(1).strip()

        if '@' in m1 and not m1.startswith('mailto:'):
            link = 'mailto:' + m1
        else:
            link = m1

        link = quote(link)

        if m.group(2):
            linkname = m.group(2).strip()
        else:
            # remove any mailto before labelling.
            linkname = re.sub('^mailto:', '', link)

        b = b[:m.start()] + r'<a href=\"%s\">%s<\/a>' % (link, linkname) + b[m.end():]

        m = r.search(b, m.start())

    return b

def br(b):
    """Does simple text replacements on a block of text. ('block replacements')"""
    # Deal with literal backspaces.
    b = re.sub(r'\\\\', '&jemLITerl33talBS;', b)

    # Deal with {{html embedding}}.
    b = replacequoted(b)

    b = allreplace(b)

    # First do the URL thing.
    b = b.lstrip('-. \t') # remove leading spaces, tabs, dashes, dots.
    b = replacelinks(b)

    # Deal with /italics/ first because the '/' in other tags would otherwise
    # interfere.
    r = re.compile(r'(?<!\\)/(.*?)(?<!\\)/', re.M + re.S)
    b = re.sub(r, r'<i>\1</i>', b)

    # Deal with *bold*.
    r = re.compile(r'(?<!\\)\*(.*?)(?<!\\)\*', re.M + re.S)
    b = re.sub(r, r'<b>\1</b>', b)

    # Deal with +monospace+.
    r = re.compile(r'(?<!\\)\+(.*?)(?<!\\)\+', re.M + re.S)
    b = re.sub(r, r'<tt>\1</tt>', b)

    # Deal with "double quotes".
    r = re.compile(r'(?<!\\)"(.*?)(?<!\\)"', re.M + re.S)
    b = re.sub(r, r'&ldquo;\1&rdquo;', b)

    # Deal with left quote `.
    r = re.compile(r"(?<!\\)`", re.M + re.S)
    b = re.sub(r, r'&lsquo;', b)

    # Deal with apostrophe '.
    r = re.compile(r"(?<!\\)'", re.M + re.S)
    b = re.sub(r, r'&rsquo;', b)

    # Deal with em dash ---.
    r = re.compile(r"(?<!\\)---", re.M + re.S)
    b = re.sub(r, r'&mdash;', b)

    # Deal with en dash --.
    r = re.compile(r"(?<!\\)--", re.M + re.S)
    b = re.sub(r, r'&ndash;', b)

    # Deal with ellipsis ....
    r = re.compile(r"(?<!\\)\.\.\.", re.M + re.S)
    b = re.sub(r, r'&hellip;', b)

    # Deal with non-breaking space ~.
    r = re.compile(r"(?<!\\)~", re.M + re.S)
    b = re.sub(r, r'&nbsp;', b)

    # Deal with registered trademark \R.
    r = re.compile(r"(?<!\\)\\R", re.M + re.S)
    b = re.sub(r, r'&reg;', b)

    # Deal with copyright \C.
    r = re.compile(r"(?<!\\)\\C", re.M + re.S)
    b = re.sub(r, r'&copy;', b)

    # Deal with line break.
    r = re.compile(r"(?<!\\)\\n", re.M + re.S)
    b = re.sub(r, r'<br />', b)

    # Second to last, remove any remaining quoting backslashes.
    b = re.sub(r'\\(?!\\)', '', b)

    # Deal with literal backspaces.
    b = re.sub('&jemLITerl33talBS;', r'\\', b)

    return b

def allreplace(b):
    """Replacements that should be done on everything."""
    r = re.compile(r"(?<!\\)>", re.M + re.S)
    b = re.sub(r, r'&gt;', b)

    r = re.compile(r"(?<!\\)<", re.M + re.S)
    b = re.sub(r, r'&lt;', b)

    return b

def pyint(l):
    l = l.rstrip()
    if l.startswith('>>>'):
        hb('<span class="pycommand">|</span>\n', allreplace(l))
    elif l.startswith('#'): # jem upgrade this to handle not at the beginning.
        hb('<span class="comment">|</span>\n', allreplace(l))
    else:
        out(allreplace(l) + '\n')

def py(l):
    # jem need to do much better here.
    l = l.rstrip()
    if l.startswith('>>>'):
        hb('<span class="pycommand">|</span>\n', allreplace(l))
    elif l.startswith('#'):
        hb('<span class="comment">|</span>\n', allreplace(l))
    else:
        out(allreplace(l) + '\n')

def dashlist():
    level = 0

    while pc() == '-':
        (s, newlevel) = np(True)

        # first adjust list number as appropriate.
        if newlevel > level:
            for i in range(newlevel - level):
                if newlevel > 1:
                    out('\n')
                out('<ul>\n<li>')
        elif newlevel < level:
            for i in range(level - newlevel):
                out('</li>\n</ul>\n</li><li>')
        else:
            out('</li>\n<li>')

        out(br(s))
        level = newlevel

    for i in range(level):
        out('</li>\n</ul>\n')

def dotlist():
    level = 0

    while pc() == '.':
        (s, newlevel) = np(True)

        # first adjust list number as appropriate.
        if newlevel > level:
            for i in range(newlevel - level):
                if newlevel > 1:
                    out('\n')
                out('<ol>\n<li>')
        elif newlevel < level:
            for i in range(level - newlevel):
                out('</li>\n</ol>\n</li><li>')
        else:
            out('</li>\n<li>')

        out(br(s))
        level = newlevel

    for i in range(level):
        out('</li>\n</ol>\n')

def colonlist():
    out('<dl>\n')
    while pc() == ':':
        s = np()
        r = re.compile(r'\s*{(.*?)(?<!\\)}(.*)', re.M + re.S)
        g = re.match(r, s)

        if not g or len(g.groups()) != 2:
            raise SyntaxError("couldn't handle the jandal (invalid deflist "
                             "format) on line %d" % linenum)
        # split into definition / non-definition part.
        defpart = g.group(1)
        rest = g.group(2)

        hb('<dt>|</dt>\n', br(defpart))
        hb('<dd>|</dd>\n', br(rest))

    out('</dl>\n')

def codeblock():
    out(conf['codeblock'])
    if len(g[0]):
        hb(conf['blocktitle'], g[0])
    out(conf['codeblockcontent'])

    if g[1] not in ('', 'pyint', 'py'):
        raise SyntaxError( \
          "couldn't handle the jandal: unrecognised syntax "
          "highlighting on line %d" % linenum)

    # Now we are handling code.
    # Handle \~ and ~ differently.
    while 1: # wait for EOF.
        l = nl(codemode=True)
        if not l:
            break
        elif l.startswith('~'):
            break
        elif l.startswith('\\~'):
            l = l[1:]
        elif l.startswith('\\{'):
            l = l[1:]

        if g[1] == 'pyint':
            pyint(l)
        elif g[1] == 'py':
            py(l)
        else:
            out(allreplace(l))

    out(conf['codeblockend'])

# load the conf.
conf = parseconf(confnames)

# Get the file started with the firstbit.
out(conf['firstbit'])

linenum = 0

menu = None
footer = True
if pc() == '#':
    l = infile.readline()
    linenum += 1
    if l.startswith('# jemdoc: '):
        l = l[len('# jemdoc: '):]
        a = l.split(',')
        # jem only handle one argument for now.
        for b in a:
            b = b.strip()
            if b.startswith('menu'):
                sidemenu = True
                r = re.compile(r'(?<!\\){(.*?)(?<!\\)}', re.M + re.S)
                g = re.findall(r, l)
                if len(g) != 2:
                    raise SyntaxError('sidemenu error on line %d' % linenum)

                menu = (g[0], g[1])

            elif b.startswith('nodate'):
                footer = False

# Look for a title.
if pc() == '=': # don't check exact number of '=' here jem.
    t = br(nl())[:-1]
    hb(conf['windowtitle'], t)
    out(conf['bodystart'])

else:
    out(conf['bodystart'])
    t = None

if menu:
    out(conf['menustart'])
    insertmenuitems(*menu)
    out(conf['menuend'])
else:
    out(conf['nomenu'])

if t is not None:
    hb(conf['doctitle'], t)

    # Look for a subtitle.
    if pc() != '\n':
        hb(conf['subtitle'], br(np()))

    hb(conf['doctitleend'], t)

infoblock = False
while 1: # wait for EOF.
    p = pc()

    if p == '':
        break

    # look for lists.
    elif p == '-':
        dashlist()

    elif p == '.':
        dotlist()

    elif p == ':':
        colonlist()

    # look for titles.
    elif p == '=':
        (s, c) = nl(True)
        # trim trailing \n.
        s = s[:-1]
        hb('<h%d>|</h%d>\n' % (c, c), br(s))

    # look for comments.
    elif p == '#':
        nl()

    elif p == '\n':
        nl()

    # look for blocks.
    elif p == '~':
        nl()
        if infoblock:
            out(conf['infoblockend'])
            infoblock = False
            nl()
            continue
        else:
            if pc() == '{':
                l = br(nl())
                r = re.compile(r'(?<!\\){(.*?)(?<!\\)}', re.M + re.S)
                g = re.findall(r, l)
            else:
                g = []

            if len(g) in (0, 1): # info block.
                out(conf['infoblock'])
                infoblock = True
                
                if len(g) == 1: # info block.
                    hb(conf['blocktitle'], g[0])

                out(conf['infoblockcontent'])

            elif len(g) == 2:
                codeblock()

            else:
                raise JandalError("couldn't handle block", linenum)

    else:
        s = br(np())
        if s:
            hb('<p>|</p>\n', s)

if footer:
    s = time.strftime('%F %R:%S %Z', time.localtime(time.time()))
    out(conf['footerstart'])
    hb(conf['lastupdated'], s)
    out(conf['footerend'])

if menu:
    out(conf['menulastbit'])
else:
    out(conf['nomenulastbit'])

out(conf['bodyend'])

if outfile is not sys.stdout:
    outfile.close()
