From 75a86f01e5401c085da1dac19dd456dded611136 Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Mon, 8 Sep 2025 18:34:46 -0400 Subject: [PATCH] add linkage metrics --- gloss/modes/glossary.py | 49 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/gloss/modes/glossary.py b/gloss/modes/glossary.py index aa29f832e2945d3c154256946e01e278c5c8fee4..340456d6395422093a469bcdcdf407680702dda8 100755 --- a/gloss/modes/glossary.py +++ b/gloss/modes/glossary.py @@ -43,6 +43,10 @@ class GlsFile: names: Set[str] blocks: List[Block] see_also: Optional[List[str]] + links: Set['GlsFile'] + + def __hash__(self): + return id(self) ### PARSING ### @@ -113,7 +117,7 @@ def first_pass(slug, fp): elif block_type is not None: blocks.append(Block(block_type, block_text, block_meta)) - return GlsFile(slug, title, [*names], blocks, see_also) + return GlsFile(slug, title, [*names], blocks, see_also, set()) quote_pat = re.compile("((?:(?!@@)(?!//).)*)(?:@@((?:(?!//).)+))?(?://(.+))?") @@ -169,10 +173,14 @@ def gen_inner_html(fmt, file, idx): if m is None: continue + other = idx.by_name[name] + blacklist.update(other.names) + file.links.add(other) + s, e = m.span() block.text.pop(i) block.text.insert(i, seg[e:]) - block.text.insert(i, (f'{seg[s:e]}',)) + block.text.insert(i, (f'{seg[s:e]}',)) block.text.insert(i, seg[:s]) break @@ -212,11 +220,14 @@ def gen_inner_html(fmt, file, idx): href = page.slug name = page.title else: + page = None if '|' in what: href, name = what.split('|', 1) else: href = name = what see_also += f"\n\t
  • {html.escape(name)}
  • " + if page is not None: + file.links.add(page) see_also += "\n" else: see_also = "" @@ -224,6 +235,32 @@ def gen_inner_html(fmt, file, idx): return content, see_also +### METRICS ### + +def remove_island(root, nodes): + to_check = [root] + for node in to_check: + if node not in nodes: + continue + + to_check.extend(node.links) + nodes.remove(node) + +def count_orphans(root, nodes): + nodes = set(nodes) + + remove_island(root, nodes) + num_orphans = len(nodes) + orphans = [*nodes] + + num_islands = 0 + while len(nodes) > 0: + remove_island(next(iter(nodes)), nodes) + num_islands += 1 + + return num_orphans, num_islands, orphans + + ### ENTRYPOINT ### def gloss(sfmt, srcdir, outdir): @@ -270,5 +307,13 @@ def gloss(sfmt, srcdir, outdir): 'modtime': datetime.fromtimestamp(stat(f'{srcdir}/{file.slug}.gls').st_mtime) } fp.write(template.format(**ctx)) + + num_orphans, num_islands, orphans = count_orphans(indexes.by_slug['index'], files) print(sfmt.format('###', f'generated {len(files)} entries')) + if num_orphans != 0: + pl_entry = 'entry' if num_orphans == 1 else 'entries' + pl_island = 'island' if num_islands == 1 else 'islands' + pl_orphan = 'orphan' if num_orphans == 1 else 'orphans' + print(sfmt.format('!!!', f'{num_orphans} orphan {pl_entry} across {num_islands} {pl_island}')) + print(sfmt.format('!!!', f'{pl_orphan}: {", ".join(x.title for x in orphans)}'))