#!/bin/env python3 import html import re from datetime import datetime from os import stat, system from glob import glob from typing import List, Optional, Union, Dict, Set, Tuple from dataclasses import dataclass from sys import argv, stderr, exit from pathlib import Path from subprocess import Popen, PIPE, run as runcmd usage = f''' usage: {argv[0]} For each .gls file in SRCDIR, creates a matching .html file in OUTDIR. If SRCDIR/template.html exists, it will be used. Templates are interpolated as in str.format, and the following keys are provided: {{title}} - The first name for an article. {{slug}} - The name of the output file, minus '.html'. {{body}} - The HTML content of an article. {{modtime}} - The datetime of the article's last edit. '''[1:] def die(why, code=1): print(why, file=stderr) exit(code) ### TYPES ### @dataclass class Block: ty: str text: str meta: str @dataclass class GlsFile: slug: str title: str names: Set[str] blocks: List[Block] see_also: Optional[List[str]] ### PARSING ### def readlines(fp): return filter( lambda x: not x.startswith(';'), map( lambda x: x.strip(), fp.readlines() ) ) def first_pass(slug, fp): lines = readlines(fp) names = set() title = None for name in lines: if len(name) == 0 and len(names) != 0: # section end break if title is None: title = name.strip(); names.add(name.strip()) if len(names) == 0: print(f'Empty file: {slug}.gls', file=stderr) return None do_see_also = False block_type = None block_text = None block_meta = '' blocks = [] for line in lines: if len(line) == 0 and block_type is not None: # block end blocks.append(Block(block_type, block_text, block_meta)) block_type = None block_text = None block_meta = '' continue if line.startswith('***') and block_type is None: # see also section do_see_also = True break if line.startswith('~'): block_meta = line[1:].strip() elif block_type is None: if len(line) == 0: continue if line.startswith('>'): block_type = 'quote' block_text = line[1:].strip() continue if line.startswith("\\>") or line.startswith("\\~") or line.startswith("\\***") or line == "\\": line = line[1:] block_type = 'para' block_text = line elif block_type == 'quote': if line.startswith('>'): line = line[1:].strip() block_text += ' ' + line elif block_type == 'para': block_text += ' ' + line see_also = None if do_see_also: see_also = [*filter(len, lines)] elif block_type is not None: blocks.append(Block(block_type, block_text, block_meta)) return GlsFile(slug, title, [*names], blocks, see_also) class Markup: def __init__(self, where, *, cfile='markup.c', bfile='markup'): self.cfile = str(Path(where, cfile)) self.bfile = str(Path(where, bfile)) if stat(self.cfile).st_mtime > stat(self.bfile).st_mtime: print("recompiling markup subsystem...") runcmd(['cc', self.cfile, '-o', self.bfile, '-Wall'], check=True) print("recompiled!") self.proc = Popen([self.bfile, 'convert'], stdin=PIPE, stdout=PIPE, text=True) def process(self, text): self.proc.stdin.write(text+"\n") self.proc.stdin.flush() segments = [] while (line := self.proc.stdout.readline()) not in ('NEXT\n', ''): ty = line[0:4] length = int(line[5:9]) ltext = line[12:12+length] if ty == 'HTML': segments.append((ltext,)) elif ty == 'IESC': segments.append((html.escape(ltext),)) elif ty == 'TEXT': segments.append(ltext) else: print(f'read in unknown type "{ty}" from markup subprocess', file=stderr) return segments quote_pat = re.compile("((?:(?!@@)(?!//).)*)(?:@@((?:(?!//).)+))?(?://(.+))?") ### GENERATION ### def link_repl(mat): url, name = map(lambda x: x if x is None else html.escape(x.strip()), mat.groups()) if name is not None and len(name) != 0: return f'{name}' else: return f'<{url}>' @dataclass(init=False) class Indexes: by_slug: Dict[str, GlsFile] by_name: Dict[str, GlsFile] names_sorted: List[Tuple[str, re.Pattern]] def __init__(self, files): self.by_slug = {} self.by_name = {} self.names_sorted = [] for file in files: self.by_slug[file.slug] = file for name in file.names: if name in self.by_name: die(f'Redefinition of "{name}": occurs in {self.by_name[name].slug}.gls and {file.slug}.gls') self.by_name[name] = file pattern = re.compile(f"((?<=\\W)|^){re.escape(name)}((?=\\W)|$)", re.IGNORECASE) self.names_sorted.append((name, pattern)) sorted(self.names_sorted, key=lambda x: len(x[0])) def gen_inner_html(fmt, file, idx): for block in file.blocks: # format text, listify it block.text = fmt.process(block.text) blacklist = set() for name, pat in idx.names_sorted: # populate local links if name in file.names: # ...but not to the same file continue if name in blacklist: # ...and don't repeat! continue for block in file.blocks: for i in range(len(block.text)): seg = block.text[i] if type(seg) != str: continue m = pat.search(seg) if m is None: continue s, e = m.span() block.text.pop(i) block.text.insert(i, seg[e:]) block.text.insert(i, (f'{seg[s:e]}',)) block.text.insert(i, seg[:s]) break content = f"

{html.escape(file.title)}

" for block in file.blocks: # ok generate the html text = ''.join(map(lambda x: html.escape(x) if type(x) == str else x[0], block.text)) if block.ty == 'para': content += f"\n

{text}

" elif block.ty == 'quote': if block.meta is not None and len(block.meta): sauce, date, url = map( lambda x: x if x is None else html.escape(x.strip()), quote_pat.match(block.meta).groups() ) else: sauce = date = url = None content += "\n
\n\t' content += text content += '' if sauce is not None: content += "\n\t

" + (sauce if url is None else f'{sauce}') + "" if date is not None: content += f', {date}' content += '

' content +='\n
' if file.see_also is not None and len(file.see_also) != 0: content += "\n
\n

See Also:

\n" return content ### ENTRYPOINT ### if __name__ == '__main__': argv = argv[1:] if len(argv) != 2 or argv[0][0] == '-' or argv[1][0] == '-': die(usage) srcdir = argv[0] outdir = argv[1] try: with open('{srcdir}/template.html', 'rt') as fp: template = fp.read() except Exception: template = \ ''' {title}
{body}
''' fmt = Markup(Path(__file__).parent) files = [] for fn in glob('*.gls', root_dir=srcdir): with open(f'{srcdir}/{fn}', 'rt') as fp: data = first_pass(fn[:-4], fp) if data is not None: files.append(data) indexes = Indexes(files) for file in files: with open(f'{outdir}/{file.slug}.html', 'wt') as fp: ctx = { 'title': html.escape(file.title), 'slug': html.escape(file.slug), 'body': gen_inner_html(fmt, file, indexes), 'modtime': datetime.fromtimestamp(stat(f'{srcdir}/{file.slug}.gls').st_mtime) } fp.write(template.format(**ctx))