( grep.tal by aleteoryx, all rights released an implementation of the bezza-yates-gonet "shift-or" bitap search algorithm, for fixed search patterns up to 16 bytes ) %dbg { #010e DEO } %kil { #010f DEO BRK } %p2brk { POP2r BRK } ( --- initialization --- ) |100 @on-reset ( -> ) ;meta #06 DEO2 ;args/checkdash #10 DEO2 #0017 DEI EQU ?usage ( die if no args ) ;lut ( >> ) @fill-lut ( ptr* -> ) #ffff OVR2 STA2 ( write ffff to LUT ) INC2 INC2 ( offset ) DUP2 ;lut/end NEQ2 ?fill-lut ( loop if not at end ) POP2 BRK @meta "grep.rom 0a "aleteoryx, 20 "all 20 "rights 20 "released 0a0a "bezza-yates-gonet 20 ""bitap" 20 "text 20 "search 0a00 @args/checkdash ( -> ) [ LIT2 "- 12 ] DEI NEQ ?{ ;args/optstr #10 DEO2 BRK } ;args/lastarg #10 DEO2 ( >> ) @args/lastarg ( -> ) #17 DEI #02 DUP2 NEQ ?{ POP2 !lut/in } ( argument byte, parse ) INC EQU ?usage ( only accept one string arg ) ( must be 04, end of args ) ( we need to shift state up 10 - lut/len bits before orring it with match, this is where that's calculated. ) ;lut/len LDA2 SUB ( the high byte will be 00 ) ;bitap/sft STA ;bitap/in #10 DEO2 [ LITr 80 ] #ffff ( prep the stack for bitap/in ) BRK @args/optstr ( -> ) #17 DEI #03 DUP2 NEQ ?{ ( end of -args? ) POP2 ;args/lastarg #10 DEO2 BRK } INC DUP2 EQU ?usage ( we can't be the only arg ) POP2 #12 DEIk LIT "F NEQ ?{ POP BRK } ( [F]ixed mode -- do nothing ) DEIk LIT "- NEQ ?{ POP BRK } ( for '--' arg ) DEIk LIT "P NEQ ?{ POP ( [P]attern mode ) ;lut/in-pattern ;lut/parser-jmp STA2 BRK } DEIk LIT "o NEQ ?{ POP BRK } DEI ,unknopt/opt STR !unknopt ( --- printing --- ) @usage/str "usage: 20 " 20 "| 20 "grep.rom 20 "[-FPo 20 "| 20 "--] 20 "PATTERN 0a @usage ( -- ) #01 ;/str !die @unknopt/str "fatal: 20 "unknown 20 "option 20 "- &opt $1 0a @unknopt ( -- ) #01 ;/str !die @toolong/str "fatal: 20 "pattern 20 "must 20 "be 20 "<16 20 "bytes. 0a @toolong ( -- ) #02 ;/str ( >> ) @die ( code str* -- ) &loop LDAk DUP #19 DEO ( get and write char ) STH INC2 STHr ( increment str* ) #0a NEQ ?/loop ( loop if char != 0a ) POP2 #0f DEO BRK ( exit with status XX ) ( --- check for a match --- ) @bitap/in ( state* `match -> state* `match ) ( handle the line buffer ) #0a12 DEI DUP ( get char, save a byte later ) [ LIT2 &ptr =/line ] ( line pointer ) STH2k STA STH2r ( save byte in buffer ) INC2 ,/ptr STR2 ( save new pointer ) EQU ?/newline ( reset state on newline ) ( shift-or ) #10 SFT2 ( SHIFT in a 0 ) lut/get-addr LDA2 ORA2 ( OR with LUT value ) ( matches are stored in the high bit of the top byte of the return stack. if the high bit is low [i.e. we got a match], this will set match to 00, otherwise it will stay 01. ) DUP2 [ LIT &sft $1 ] SFT2 POP STH ANDr BRK @bitap/newline ( state* `match -> state* `match ) ;/line ,/ptr STR2 ( reset line ptr ) POP2 #ffff ( reset state ) STHr [ LITr 80 ] ( reset match ) ?{ ( we saw a match, print the line ) ;/line &loop LDAk DUP #18 DEO ( get and write char ) STH INC2 STHr ( increment str* ) #0a NEQ ?/loop ( loop if char != 0a ) POP2 } BRK ( --- look-up table generation & pattern parsing --- ) @lut/in-fixed ( -- ) /get-addr LDA2k ( get current mask ) /get-mask AND2 ( compute new value for slot ) SWP2 STA2 ( write new mask ) JMP2r @lut/in-wildcard ( -- ) /get-mask [ LIT2r =lut/end LIT2r =lut ] ( mask* `end* `addr* ) &wc-loop DUP2 STH2rk LDA2 AND2 ( calculate new mask ) STH2rk STA2 ( save it ) INC2r INC2r ( inc. addr ) NEQ2rk STHr ?/wc-loop ( loop ) POP2r POP2r POP2 JMP2r @lut/in-class1 ( -- ) ;/in-class ,/parser-jmp STR2 ( only do this once ) LIT2 "- 12 DEI NEQ ?{ ( leading dash ) /in-fixed p2brk } ( >> ) @lut/in-class ( -- ) #12 DEIk LIT "] NEQ ?{ POP ( end of class ) ;/in-pattern ,/parser-jmp STR2 JMP2r } DEIk LIT "- NEQ ?{ POP ( range section ) ;/in-class-range ,/parser-jmp STR2 ,/lookbehind LDR ( last char on stack ) p2brk } DEIk LIT "\ NEQ ?{ POP ( backslash escape ) ;/in-class-bs ,/parser-jmp STR2 p2brk } POP /in-fixed p2brk @lut/in-class-bs ( -- ) ;/in-class ,/parser-jmp STR2 ( reset the escape ) /in-fixed p2brk ( placed here for STR purposes ) @lut/in ( -> ) ( first, store the char ) [ LIT2 &lookbehind $2 ] NIP #12 DEI ,/lookbehind STR2 ( then, update the pattern however ) [ LIT2 &parser-jmp =/in-fixed ] JSR2 ( then update the bitmask ) ,/len LDR2 ( get the length ) #0010 ADD2 ( calculate new counter value ) ,/len STR2 ( save it ) BRK @lut/get-mask ( -- mask* ) #0001 ( gets shifted later ) [ LIT2 &len 0000 ] ( how far into the pattern we are ) SWP ?toolong ( die, pattern is > 16 bytes ) SFT2 #ffff EOR2 ( nth bit is 0, all others are 1 ) JMP2r @lut/in-pattern ( -- ) #12 DEIk LIT "\ NEQ ?{ POP ( backslash escape ) ;/in-bs ,/parser-jmp STR2 p2brk } ( don't shift mask ) DEIk LIT "[ NEQ ?{ POP ( character class ) ;/in-class1 ,/parser-jmp STR2 p2brk } ( don't shift mask ) DEIk LIT "? NEQ ?{ POP !/in-wildcard } ( will allow anything ) POP !/in-fixed ( non-special ) @lut/in-bs ( -- ) ;/in-pattern ,/parser-jmp STR2 ( reset the escape ) !/in-fixed @lut/in-class-range ( char1 -- ) #00 SWP #10 SFT2 ( 2*char1* ) ;lut ADD2 ( start* ) #12 DEI ( start* char2 ) #00 SWP #10 SFT2 ( start* 2*char2* ) ;lut ADD2 INC2 INC2 ( start* end* ) STH2 STH2 /get-mask ( `end* `start* mask* ) &range-loop DUP2 STH2rk LDA2 AND2 STH2rk STA2 INC2r INC2r NEQ2rk dbg STHr ?/range-loop POP2r POP2r POP2 ;/in-class ;/parser-jmp STA2 p2brk @lut/get-addr ( -- addr* ) #0012 DEI ( input byte, prefixed with 00 ) #10 SFT2 ;lut ADD2 ( offset in lookup table ) JMP2r |1000 @lut ( ... ) $200 ( ... ) @lut/end @bitap/line ( the incoming line gets written here )