From ed42056faa25b1a8d12605ddb112e3c90fa91a28 Mon Sep 17 00:00:00 2001
From: Aleteoryx <alyx@aleteoryx.me>
Date: Fri, 15 Aug 2025 00:22:06 -0400
Subject: [PATCH] blam

---
 gloss.py       | 240 +++++++++++++++++++++++++++++++++++++++++++++++++
 testout/a.html |  23 +++++
 testout/b.html |  19 ++++
 testsrc/a.gls  |  13 +++
 testsrc/b.gls  |   6 ++
 5 files changed, 301 insertions(+)
 create mode 100755 gloss.py
 create mode 100644 testout/a.html
 create mode 100644 testout/b.html
 create mode 100644 testsrc/a.gls
 create mode 100644 testsrc/b.gls
diff --git a/gloss.py b/gloss.py
new file mode 100755
index 0000000000000000000000000000000000000000..7708f1b7dd3658792ec68a91dab39a8ad0f5fb1b
--- /dev/null
+++ b/gloss.py
@@ -0,0 +1,240 @@
+#!/bin/env python3
+
+import html
+import re
+from datetime import datetime
+from os import stat
+from glob import glob
+from typing import List, Optional, Union, Dict, Set, Tuple
+from dataclasses import dataclass
+from sys import argv, stderr, exit
+
+usage = f'''
+usage: {argv[0]} <SRCDIR> <OUTDIR>
+
+For each .gls file in SRCDIR, creates a matching .html file in OUTDIR.
+
+If SRCDIR/template.html exists, it will be used. Templates are
+interpolated as in str.format, and the following keys are provided:
+
+	{{title}} - The first name for an article.
+	{{slug}} - The name of the output file, minus '.html'.
+	{{body}} - The HTML content of an article.
+	{{modtime}} - The datetime of the article's last edit.
+'''[1:]
+
+def die(why, code=1):
+	print(why, file=stderr)
+	exit(code)
+
+
+### TYPES ###
+
+@dataclass
+class Block:
+	ty: str
+	text: str
+	meta: str
+
+@dataclass
+class GlsFile:
+	slug: str
+	names: Set[str]
+	blocks: List[Block]
+	see_also: Optional[List[str]]
+
+
+### PARSING ###
+
+def readlines(fp):
+	return filter(
+		lambda x: not x.startswith(';'), 
+		map(
+			lambda x: x.strip(), 
+			fp.readlines() ) )
+
+def first_pass(slug, fp):
+	lines = readlines(fp)
+
+	names = set()
+	for name in lines:
+		if len(name) == 0 and len(names) != 0:			# section end
+			break
+		names.add(name.strip())
+	
+	if len(names) == 0:
+		print(f'Empty file: {slug}.gls', file=stderr)
+		return None
+	
+	do_see_also = False
+	block_type = None
+	block_text = None
+	block_meta = ''
+	blocks = []
+	for line in lines:
+		if len(line) == 0 and block_type is not None:		# block end
+			blocks.append(Block(block_type, [html.escape(block_text)], block_meta))
+			block_type = None
+			block_text = None
+			block_meta = ''
+			continue
+		if line.startswith('***') and block_type is None:	# see also section
+			do_see_also = True
+			break
+
+		if line.startswith('~'):
+			block_meta = line[1:].strip()
+		elif block_type is None:
+			if len(line) == 0:
+				continue
+			if line.startswith('>'):
+				block_type = 'quote'
+				block_text = line[1:].strip()
+				continue
+			
+			if line.startswith("\\"):
+				line = line[1:]
+			block_type = 'para'
+			block_text = line
+		elif block_type == 'quote':
+			if line.startswith('>'):
+				line = line[1:].strip()
+			block_text += "\n" + line
+		elif block_type == 'para':
+			block_text += ' ' + line
+	
+	see_also = None
+	if do_see_also:
+		see_also = [*filter(len, lines)]
+	
+	return GlsFile(slug, [*names], blocks, see_also)
+
+quote_pat = re.compile("((?:(?!@@)(?!//).)+)(?:@@((?:(?!//).)+))?(?://(.+))?")
+
+
+### GENERATION ###
+	
+@dataclass(init=False)
+class Indexes:
+	by_slug: Dict[str, GlsFile]
+	by_name: Dict[str, GlsFile]
+	names_sorted: List[Tuple[str, re.Pattern]]
+
+	def __init__(self, files):
+		self.by_slug = {}
+		self.by_name = {}
+		self.names_sorted = []
+		
+		for file in files:
+			self.by_slug[file.slug] = file
+			for name in file.names:
+				if name in self.by_name:
+					die(f'Redefinition of "{name}": occurs in {self.by_name[name].slug}.gls and {file.slug}.gls')
+				self.by_name[name] = file
+				
+				pattern = re.compile(f"((?<=\\W)|^){re.escape(name)}((?=\\W)|$)", re.IGNORECASE)
+				self.names_sorted.append((name, pattern))
+		
+		sorted(self.names_sorted, key=lambda x: len(x[0]))
+			
+def gen_inner_html(file, idx):
+	blacklist = set()
+	for name, pat in idx.names_sorted:	# populate local links
+		if name in file.names:		# ...but not to the same file
+			continue
+		if name in blacklist:		# ...and don't repeat!
+			continue
+		for block in file.blocks:
+			for i in range(len(block.text)):
+				seg = block.text[i]
+				if type(seg) != str:
+					continue
+				m = pat.search(seg)
+				if m is None:
+					continue
+
+				s, e = m.span()
+				block.text.pop(i)
+				block.text.insert(i, seg[e:])
+				block.text.insert(i, (f'<a href="{html.escape(idx.by_name[name].slug)}.html">{seg[s:e]}</a>',))
+				block.text.insert(i, seg[:s])
+				break
+	
+	content = f"<h1>{html.escape(file.names[0])}</h1>"
+	for block in file.blocks:		# ok generate the html
+		text = ''.join(map(lambda x: x if type(x) == str else x[0], block.text))
+		if block.ty == 'para':
+			content += f"\n<p>{text}</p>"
+		elif block.ty == 'quote':
+			sauce, date, url = map(
+				lambda x: x if x is None else html.escape(x),
+				quote_pat.match(block.meta).groups() )
+
+			content += "\n<div>\n\t<blockquote" + ('' if url is None else f' cite="{url}"') + '>'
+			content += text
+			content += '</blockquote>'
+			
+			if sauce is not None:
+				content += "\n\t<p>&mdash; <cite>" + (sauce if url is None else f'<a href="{url}">{sauce}</a>') + "</cite>"
+				if date is not None:
+					content += f', {date}'
+				content += '</p>'
+			content +='\n</div>'
+	
+	if file.see_also is not None and len(file.see_also) != 0:
+		content += "\n<hr>\n<h3>See Also:</h3>\n<ul>"
+		for slug in file.see_also:
+			name = idx.by_slug[slug].names[0]
+			content += f"\n\t<li><a href=\"{html.escape(slug)}.html\">{html.escape(name)}</a></li>"
+		content += "\n</li>"
+	
+	return content
+
+
+### ENTRYPOINT ###
+
+if __name__ == '__main__':
+	argv = argv[1:]
+	if len(argv) != 2 or argv[0][0] == '-' or argv[1][0] == '-':
+		die(usage)
+
+	srcdir = argv[0]
+	outdir = argv[1]
+
+	try:
+		with open('{srcdir}/template.html', 'rt') as fp:
+			template = fp.read()
+	except Exception:
+		template = \
+'''<!doctype html>
+<html>
+<head>
+	<meta encoding='utf-8'>
+	<title>{title}</title>
+</head>
+<body>
+<main>
+{body}
+</main>
+<footer>File last modified {modtime:%a, %Y-%d-%M %H:%M:%S %Z}</footer>
+</body>
+</html>
+'''
+
+	files = []
+	for fn in glob('*.gls', root_dir=srcdir):
+		with open(f'{srcdir}/{fn}', 'rt') as fp:
+			data = first_pass(fn[:-4], fp)
+			if data is not None:
+				files.append(data)
+	
+	indexes = Indexes(files)
+	for file in files:
+		with open(f'{outdir}/{file.slug}.html', 'wt') as fp:
+			ctx = {
+				'title': html.escape(file.names[0]),
+				'slug': html.escape(file.slug),
+				'body': gen_inner_html(file, indexes),
+				'modtime': datetime.fromtimestamp(stat(f'{srcdir}/{file.slug}.gls').st_mtime)
+			}
+			fp.write(template.format(**ctx))
diff --git a/testout/a.html b/testout/a.html
new file mode 100644
index 0000000000000000000000000000000000000000..688ecfccb13ff1b5e19a02d76dd6c21e8f30d62c
--- /dev/null
+++ b/testout/a.html
@@ -0,0 +1,23 @@
+<!doctype html>
+<html>
+<head>
+	<meta encoding='utf-8'>
+	<title>The Letter A</title>
+</head>
+<body>
+<main>
+<h1>The Letter A</h1>
+<p>this is a paragraph block. these lines will be folded into one string and ultimately rendered roughly the same in the browser.</p>
+<div>
+	<blockquote>this is a quote block</blockquote>
+	<p>&mdash; <cite>aleteoryx</cite></p>
+</div>
+<hr>
+<h3>See Also:</h3>
+<ul>
+	<li><a href="b.html">The Letter B</a></li>
+</li>
+</main>
+<footer>File last modified Thu, 2025-14-40 18:40:06 </footer>
+</body>
+</html>
diff --git a/testout/b.html b/testout/b.html
new file mode 100644
index 0000000000000000000000000000000000000000..cc370364413a22092bb7a397a05031dfd0f6f61b
--- /dev/null
+++ b/testout/b.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<html>
+<head>
+	<meta encoding='utf-8'>
+	<title>The Letter B</title>
+</head>
+<body>
+<main>
+<h1>The Letter B</h1>
+<p>preceeded by <a href="a.html">the letter a</a>.</p>
+<hr>
+<h3>See Also:</h3>
+<ul>
+	<li><a href="a.html">The Letter A</a></li>
+</li>
+</main>
+<footer>File last modified Thu, 2025-14-40 18:40:52 </footer>
+</body>
+</html>
diff --git a/testsrc/a.gls b/testsrc/a.gls
new file mode 100644
index 0000000000000000000000000000000000000000..d2e5722120fff5bb5a8dc1fad8c8431cc2b719a0
--- /dev/null
+++ b/testsrc/a.gls
@@ -0,0 +1,13 @@
+The Letter A
+
+this is a
+paragraph block. these lines will be folded into one
+string and ultimately rendered roughly
+; differently
+the same in the browser.
+
+> this is a quote block
+~aleteoryx
+
+***
+b
diff --git a/testsrc/b.gls b/testsrc/b.gls
new file mode 100644
index 0000000000000000000000000000000000000000..870c14e47c84e6e331613cd4304dfe45508cb64b
--- /dev/null
+++ b/testsrc/b.gls
@@ -0,0 +1,6 @@
+The Letter B
+
+preceeded by the letter a.
+
+***
+a