From 0bb975bd8ac8ecbe5e94e8387d358f62f92fbffc Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Tue, 16 Sep 2025 01:02:52 -0400 Subject: [PATCH] grep --- grep.tal | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ mkfile | 3 +- 2 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 grep.tal diff --git a/grep.tal b/grep.tal new file mode 100644 index 0000000000000000000000000000000000000000..141017771e594cfc0c9f81c36eed6ea21dcb4b2b --- /dev/null +++ b/grep.tal @@ -0,0 +1,124 @@ +( grep.tal + by aleteoryx, all rights released + + an implementation of the bezza-yates-gonet "shift-or" bitlap + search algorithm, for fixed search patterns up to 16 bytes ) + + +( --- initialization --- ) + +|100 +@on-reset ( -> ) + ;meta #06 DEO2 + ;args #10 DEO2 + + #0017 DEI EQU ?usage ( die if no args ) + ;lut + ( >> ) +@fill-lut ( ptr* -> ) + #ffff OVR2 STA2 ( write ffff to LUT ) + INC2 INC2 ( offset ) + DUP2 ;lut/end NEQ2 ?fill-lut ( loop if not at end ) + POP2 + BRK +@meta + "grep.rom 0a + "aleteoryx, 20 "all 20 "rights 20 "released 0a0a + + "bezza-yates-gonet 20 ""bitlap" 20 "text 20 "search 0a00 + +@args ( -> ) + #17 DEI + #02 DUP2 NEQ ?{ POP2 !lut/in } ( argument byte, parse ) + INC EQU ?usage ( only accept one arg ) + + ( must be 04 ) + + ( we need to shift state up 10 - lut/len bits before orring it + with match, this is where that's calculated. ) + ;lut/len LDA2 SUB ( the high byte will be 00 ) + ;bitlap/sft STA + + ;bitlap/in #10 DEO2 + [ LITr 80 ] #ffff ( prep the stack for bitlap/in ) + BRK + + +( --- printing --- ) + +@usage/str "usage: 20 " 20 "| 20 "grep.rom 20 "PATTERN 0a +@usage ( -- ) + #01 ;/str !die +@toolong/str "fatal: 20 "pattern 20 "must 20 "be 20 "<16 20 "bytes. 0a +@toolong ( -- ) + #02 ;/str ( >> ) +@die ( code str* -- ) + &loop + LDAk DUP #19 DEO ( get and write char ) + STH INC2 STHr ( increment str* ) + #0a NEQ ?/loop ( loop if char != 0a ) + POP2 #0f DEO BRK ( exit with status XX ) + + +( --- check for a match --- ) + +@bitlap/in ( state* `match -> state* `match ) + ( handle the line buffer ) + #0a12 DEI DUP ( get char, save a byte later ) + [ LIT2 &ptr =/line ] ( line pointer ) + STH2k STA STH2r ( save byte in buffer ) + INC2 ,/ptr STR2 ( save new pointer ) + EQU ?/newline ( reset state on newline ) + + ( shift-or ) + #10 SFT2 ( SHIFT in a 0 ) + lut/getaddr LDA2 ORA2 ( OR with LUT value ) + + ( matches are stored in the high bit of the top byte of the + return stack. if the high bit is low [i.e. we got a match], + this will set match to 00, otherwise it will stay 01. ) + DUP2 [ LIT &sft $1 ] SFT2 + POP STH ANDr BRK + +@bitlap/newline ( state* `match -> state* `match ) + ;/line ,/ptr STR2 ( reset line ptr ) + POP2 #ffff ( reset state ) + STHr [ LITr 80 ] ( reset match ) + ?{ ( we saw a match, print the line ) + ;/line + &loop + LDAk DUP #18 DEO ( get and write char ) + STH INC2 STHr ( increment str* ) + #0a NEQ ?/loop ( loop if char != 0a ) + POP2 + } BRK + + +( --- look-up table --- ) + +@lut/in ( -> ) + ( first, compute the LUT bit ) + #0001 ( gets shifted later ) + [ LIT2 &len 0000 ] ( how far into the pattern we are ) + STH2k ( save a byte later ) + SWP ?toolong ( die, pattern is > 16 bytes ) + SFT2 #ffff EOR2 ( nth bit is 0, all others are 1 ) + + ( next, write it into the LUT ) + /getaddr + STH2k LDA2 AND2 ( compute new value for slot ) + STH2r STA2 ( write new value ) + + ( last, update the counter ) + STH2r #0010 ADD2 ( calculate new counter value ) + ,/len STR2 ( save it ) + BRK + +@lut/getaddr ( -- addr* ) + #0012 DEI ( input byte, prefixed with 00 ) + #10 SFT2 ;lut ADD2 ( offset in lookup table ) + JMP2r + + +@lut ( ... ) $200 ( ... ) @lut/end +@bitlap/line ( the incoming line gets written here ) diff --git a/mkfile b/mkfile index d2080606486647163dfc947f3100d4da961204bc..ef45edbc1879f3d287a8376e6d5dcad84de39895 100644 --- a/mkfile +++ b/mkfile @@ -2,7 +2,8 @@ roms=\ arvelie.rom\ nanpa.rom\ bfc.rom\ - lzw.rom + lzw.rom\ + grep.rom all:VQ: $roms