@@ 5,12 5,16 @@
search algorithm, for fixed search patterns up to 16 bytes )
+%dbg { #010e DEO }
+%kil { #010f DEO BRK }
+%p2brk { POP2r BRK }
+
( --- initialization --- )
|100
@on-reset ( -> )
;meta #06 DEO2
- ;args #10 DEO2
+ ;args/checkdash #10 DEO2
#0017 DEI EQU ?usage ( die if no args )
;lut
@@ 27,12 31,19 @@
"bezza-yates-gonet 20 ""bitap" 20 "text 20 "search 0a00
-@args ( -> )
+@args/checkdash ( -> )
+ [ LIT2 "- 12 ] DEI NEQ ?{
+ ;args/optstr #10 DEO2 BRK
+ }
+ ;args/lastarg #10 DEO2
+ ( >> )
+
+@args/lastarg ( -> )
#17 DEI
#02 DUP2 NEQ ?{ POP2 !lut/in } ( argument byte, parse )
- INC EQU ?usage ( only accept one arg )
+ INC EQU ?usage ( only accept one string arg )
- ( must be 04 )
+ ( must be 04, end of args )
( we need to shift state up 10 - lut/len bits before orring it
with match, this is where that's calculated. )
@@ 42,13 53,35 @@
;bitap/in #10 DEO2
[ LITr 80 ] #ffff ( prep the stack for bitap/in )
BRK
+
+@args/optstr ( -> )
+ #17 DEI
+ #03 DUP2 NEQ ?{ ( end of -args? )
+ POP2 ;args/lastarg #10 DEO2 BRK
+ }
+ INC DUP2 EQU ?usage ( we can't be the only arg )
+ POP2
+ #12
+ DEIk LIT "F NEQ ?{ POP BRK } ( [F]ixed mode -- do nothing )
+ DEIk LIT "- NEQ ?{ POP BRK } ( for '--' arg )
+
+ DEIk LIT "P NEQ ?{ POP ( [P]attern mode )
+ ;lut/in-pattern ;lut/parser-jmp STA2
+ BRK }
+ DEIk LIT "o NEQ ?{ POP BRK }
+
+ DEI ,unknopt/opt STR !unknopt
( --- printing --- )
-@usage/str "usage: 20 "<program> 20 "| 20 "grep.rom 20 "PATTERN 0a
+@usage/str "usage: 20 "<program> 20 "| 20
+ "grep.rom 20 "[-FPo 20 "| 20 "--] 20 "PATTERN 0a
@usage ( -- )
#01 ;/str !die
+@unknopt/str "fatal: 20 "unknown 20 "option 20 "- &opt $1 0a
+@unknopt ( -- )
+ #01 ;/str !die
@toolong/str "fatal: 20 "pattern 20 "must 20 "be 20 "<16 20 "bytes. 0a
@toolong ( -- )
#02 ;/str ( >> )
@@ 72,7 105,7 @@
( shift-or )
#10 SFT2 ( SHIFT in a 0 )
- lut/getaddr LDA2 ORA2 ( OR with LUT value )
+ lut/get-addr LDA2 ORA2 ( OR with LUT value )
( matches are stored in the high bit of the top byte of the
return stack. if the high bit is low [i.e. we got a match],
@@ 94,31 127,111 @@
} BRK
-( --- look-up table --- )
+( --- look-up table generation & pattern parsing --- )
+
+@lut/in-fixed ( -- )
+ /get-addr LDA2k ( get current mask )
+ /get-mask AND2 ( compute new value for slot )
+ SWP2 STA2 ( write new mask )
+ JMP2r
+
+@lut/in-wildcard ( -- )
+ /get-mask [ LIT2r =lut/end LIT2r =lut ] ( mask* `end* `addr* )
+ &wc-loop
+ DUP2 STH2rk LDA2 AND2 ( calculate new mask )
+ STH2rk STA2 ( save it )
+ INC2r INC2r ( inc. addr )
+ NEQ2rk STHr ?/wc-loop ( loop )
+ POP2r POP2r POP2
+ JMP2r
+
+@lut/in-class1 ( -- )
+ ;/in-class ,/parser-jmp STR2 ( only do this once )
+ LIT2 "- 12 DEI NEQ ?{ ( leading dash )
+ /in-fixed
+ p2brk }
+ ( >> )
+@lut/in-class ( -- )
+ #12
+ DEIk LIT "] NEQ ?{ POP ( end of class )
+ ;/in-pattern ,/parser-jmp STR2
+ JMP2r }
+ DEIk LIT "- NEQ ?{ POP ( range section )
+ ;/in-class-range ,/parser-jmp STR2
+ ,/lookbehind LDR ( last char on stack )
+ p2brk }
+ DEIk LIT "\ NEQ ?{ POP ( backslash escape )
+ ;/in-class-bs ,/parser-jmp STR2
+ p2brk }
+
+ POP /in-fixed p2brk
+
+@lut/in-class-bs ( -- )
+ ;/in-class ,/parser-jmp STR2 ( reset the escape )
+ /in-fixed p2brk
+
+
+( placed here for STR purposes )
@lut/in ( -> )
- ( first, compute the LUT bit )
+ ( first, store the char )
+ [ LIT2 &lookbehind $2 ]
+ NIP #12 DEI ,/lookbehind STR2
+
+ ( then, update the pattern however )
+ [ LIT2 &parser-jmp =/in-fixed ] JSR2
+
+ ( then update the bitmask )
+ ,/len LDR2 ( get the length )
+ #0010 ADD2 ( calculate new counter value )
+ ,/len STR2 ( save it )
+ BRK
+
+@lut/get-mask ( -- mask* )
#0001 ( gets shifted later )
[ LIT2 &len 0000 ] ( how far into the pattern we are )
- STH2k ( save a byte later )
SWP ?toolong ( die, pattern is > 16 bytes )
SFT2 #ffff EOR2 ( nth bit is 0, all others are 1 )
-
- ( next, write it into the LUT )
- /getaddr
- STH2k LDA2 AND2 ( compute new value for slot )
- STH2r STA2 ( write new value )
-
- ( last, update the counter )
- STH2r #0010 ADD2 ( calculate new counter value )
- ,/len STR2 ( save it )
- BRK
+ JMP2r
-@lut/getaddr ( -- addr* )
+@lut/in-pattern ( -- )
+ #12
+ DEIk LIT "\ NEQ ?{ POP ( backslash escape )
+ ;/in-bs ,/parser-jmp STR2
+ p2brk } ( don't shift mask )
+ DEIk LIT "[ NEQ ?{ POP ( character class )
+ ;/in-class1 ,/parser-jmp STR2
+ p2brk } ( don't shift mask )
+
+ DEIk LIT "? NEQ ?{ POP !/in-wildcard } ( will allow anything )
+
+ POP !/in-fixed ( non-special )
+@lut/in-bs ( -- )
+ ;/in-pattern ,/parser-jmp STR2 ( reset the escape )
+ !/in-fixed
+
+@lut/in-class-range ( char1 -- )
+ #00 SWP #10 SFT2 ( 2*char1* )
+ ;lut ADD2 ( start* )
+ #12 DEI ( start* char2 )
+ #00 SWP #10 SFT2 ( start* 2*char2* )
+ ;lut ADD2 INC2 INC2 ( start* end* )
+ STH2 STH2 /get-mask ( `end* `start* mask* )
+ &range-loop
+ DUP2 STH2rk LDA2 AND2
+ STH2rk STA2
+ INC2r INC2r
+ NEQ2rk dbg STHr ?/range-loop
+ POP2r POP2r POP2
+ ;/in-class ;/parser-jmp STA2
+ p2brk
+
+
+@lut/get-addr ( -- addr* )
#0012 DEI ( input byte, prefixed with 00 )
#10 SFT2 ;lut ADD2 ( offset in lookup table )
JMP2r
-
+|1000
@lut ( ... ) $200 ( ... ) @lut/end
@bitap/line ( the incoming line gets written here )