From ed42056faa25b1a8d12605ddb112e3c90fa91a28 Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Fri, 15 Aug 2025 00:22:06 -0400 Subject: [PATCH] blam --- gloss.py | 240 +++++++++++++++++++++++++++++++++++++++++++++++++ testout/a.html | 23 +++++ testout/b.html | 19 ++++ testsrc/a.gls | 13 +++ testsrc/b.gls | 6 ++ 5 files changed, 301 insertions(+) create mode 100755 gloss.py create mode 100644 testout/a.html create mode 100644 testout/b.html create mode 100644 testsrc/a.gls create mode 100644 testsrc/b.gls diff --git a/gloss.py b/gloss.py new file mode 100755 index 0000000000000000000000000000000000000000..7708f1b7dd3658792ec68a91dab39a8ad0f5fb1b --- /dev/null +++ b/gloss.py @@ -0,0 +1,240 @@ +#!/bin/env python3 + +import html +import re +from datetime import datetime +from os import stat +from glob import glob +from typing import List, Optional, Union, Dict, Set, Tuple +from dataclasses import dataclass +from sys import argv, stderr, exit + +usage = f''' +usage: {argv[0]} + +For each .gls file in SRCDIR, creates a matching .html file in OUTDIR. + +If SRCDIR/template.html exists, it will be used. Templates are +interpolated as in str.format, and the following keys are provided: + + {{title}} - The first name for an article. + {{slug}} - The name of the output file, minus '.html'. + {{body}} - The HTML content of an article. + {{modtime}} - The datetime of the article's last edit. +'''[1:] + +def die(why, code=1): + print(why, file=stderr) + exit(code) + + +### TYPES ### + +@dataclass +class Block: + ty: str + text: str + meta: str + +@dataclass +class GlsFile: + slug: str + names: Set[str] + blocks: List[Block] + see_also: Optional[List[str]] + + +### PARSING ### + +def readlines(fp): + return filter( + lambda x: not x.startswith(';'), + map( + lambda x: x.strip(), + fp.readlines() ) ) + +def first_pass(slug, fp): + lines = readlines(fp) + + names = set() + for name in lines: + if len(name) == 0 and len(names) != 0: # section end + break + names.add(name.strip()) + + if len(names) == 0: + print(f'Empty file: {slug}.gls', file=stderr) + return None + + do_see_also = False + block_type = None + block_text = None + block_meta = '' + blocks = [] + for line in lines: + if len(line) == 0 and block_type is not None: # block end + blocks.append(Block(block_type, [html.escape(block_text)], block_meta)) + block_type = None + block_text = None + block_meta = '' + continue + if line.startswith('***') and block_type is None: # see also section + do_see_also = True + break + + if line.startswith('~'): + block_meta = line[1:].strip() + elif block_type is None: + if len(line) == 0: + continue + if line.startswith('>'): + block_type = 'quote' + block_text = line[1:].strip() + continue + + if line.startswith("\\"): + line = line[1:] + block_type = 'para' + block_text = line + elif block_type == 'quote': + if line.startswith('>'): + line = line[1:].strip() + block_text += "\n" + line + elif block_type == 'para': + block_text += ' ' + line + + see_also = None + if do_see_also: + see_also = [*filter(len, lines)] + + return GlsFile(slug, [*names], blocks, see_also) + +quote_pat = re.compile("((?:(?!@@)(?!//).)+)(?:@@((?:(?!//).)+))?(?://(.+))?") + + +### GENERATION ### + +@dataclass(init=False) +class Indexes: + by_slug: Dict[str, GlsFile] + by_name: Dict[str, GlsFile] + names_sorted: List[Tuple[str, re.Pattern]] + + def __init__(self, files): + self.by_slug = {} + self.by_name = {} + self.names_sorted = [] + + for file in files: + self.by_slug[file.slug] = file + for name in file.names: + if name in self.by_name: + die(f'Redefinition of "{name}": occurs in {self.by_name[name].slug}.gls and {file.slug}.gls') + self.by_name[name] = file + + pattern = re.compile(f"((?<=\\W)|^){re.escape(name)}((?=\\W)|$)", re.IGNORECASE) + self.names_sorted.append((name, pattern)) + + sorted(self.names_sorted, key=lambda x: len(x[0])) + +def gen_inner_html(file, idx): + blacklist = set() + for name, pat in idx.names_sorted: # populate local links + if name in file.names: # ...but not to the same file + continue + if name in blacklist: # ...and don't repeat! + continue + for block in file.blocks: + for i in range(len(block.text)): + seg = block.text[i] + if type(seg) != str: + continue + m = pat.search(seg) + if m is None: + continue + + s, e = m.span() + block.text.pop(i) + block.text.insert(i, seg[e:]) + block.text.insert(i, (f'{seg[s:e]}',)) + block.text.insert(i, seg[:s]) + break + + content = f"

{html.escape(file.names[0])}

" + for block in file.blocks: # ok generate the html + text = ''.join(map(lambda x: x if type(x) == str else x[0], block.text)) + if block.ty == 'para': + content += f"\n

{text}

" + elif block.ty == 'quote': + sauce, date, url = map( + lambda x: x if x is None else html.escape(x), + quote_pat.match(block.meta).groups() ) + + content += "\n
\n\t' + content += text + content += '' + + if sauce is not None: + content += "\n\t

" + (sauce if url is None else f'{sauce}') + "" + if date is not None: + content += f', {date}' + content += '

' + content +='\n
' + + if file.see_also is not None and len(file.see_also) != 0: + content += "\n
\n

See Also:

\n
    " + for slug in file.see_also: + name = idx.by_slug[slug].names[0] + content += f"\n\t
  • {html.escape(name)}
  • " + content += "\n" + + return content + + +### ENTRYPOINT ### + +if __name__ == '__main__': + argv = argv[1:] + if len(argv) != 2 or argv[0][0] == '-' or argv[1][0] == '-': + die(usage) + + srcdir = argv[0] + outdir = argv[1] + + try: + with open('{srcdir}/template.html', 'rt') as fp: + template = fp.read() + except Exception: + template = \ +''' + + + + {title} + + +
    +{body} +
    +
    File last modified {modtime:%a, %Y-%d-%M %H:%M:%S %Z}
    + + +''' + + files = [] + for fn in glob('*.gls', root_dir=srcdir): + with open(f'{srcdir}/{fn}', 'rt') as fp: + data = first_pass(fn[:-4], fp) + if data is not None: + files.append(data) + + indexes = Indexes(files) + for file in files: + with open(f'{outdir}/{file.slug}.html', 'wt') as fp: + ctx = { + 'title': html.escape(file.names[0]), + 'slug': html.escape(file.slug), + 'body': gen_inner_html(file, indexes), + 'modtime': datetime.fromtimestamp(stat(f'{srcdir}/{file.slug}.gls').st_mtime) + } + fp.write(template.format(**ctx)) diff --git a/testout/a.html b/testout/a.html new file mode 100644 index 0000000000000000000000000000000000000000..688ecfccb13ff1b5e19a02d76dd6c21e8f30d62c --- /dev/null +++ b/testout/a.html @@ -0,0 +1,23 @@ + + + + + The Letter A + + +
    +

    The Letter A

    +

    this is a paragraph block. these lines will be folded into one string and ultimately rendered roughly the same in the browser.

    +
    +
    this is a quote block
    +

    aleteoryx

    +
    +
    +

    See Also:

    +
    +
    File last modified Thu, 2025-14-40 18:40:06
    + + diff --git a/testout/b.html b/testout/b.html new file mode 100644 index 0000000000000000000000000000000000000000..cc370364413a22092bb7a397a05031dfd0f6f61b --- /dev/null +++ b/testout/b.html @@ -0,0 +1,19 @@ + + + + + The Letter B + + +
    +

    The Letter B

    +

    preceeded by the letter a.

    +
    +

    See Also:

    +
    + + + diff --git a/testsrc/a.gls b/testsrc/a.gls new file mode 100644 index 0000000000000000000000000000000000000000..d2e5722120fff5bb5a8dc1fad8c8431cc2b719a0 --- /dev/null +++ b/testsrc/a.gls @@ -0,0 +1,13 @@ +The Letter A + +this is a +paragraph block. these lines will be folded into one +string and ultimately rendered roughly +; differently +the same in the browser. + +> this is a quote block +~aleteoryx + +*** +b diff --git a/testsrc/b.gls b/testsrc/b.gls new file mode 100644 index 0000000000000000000000000000000000000000..870c14e47c84e6e331613cd4304dfe45508cb64b --- /dev/null +++ b/testsrc/b.gls @@ -0,0 +1,6 @@ +The Letter B + +preceeded by the letter a. + +*** +a