~aleteoryx/uxn

ref: 0bb975bd8ac8ecbe5e94e8387d358f62f92fbffc uxn/grep.tal -rw-r--r-- 3.1 KiB
0bb975bdAleteoryx grep 2 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
(	grep.tal
	by aleteoryx, all rights released
	
	an implementation of the bezza-yates-gonet "shift-or" bitlap
	search algorithm, for fixed search patterns up to 16 bytes )


( --- initialization --- )

|100
@on-reset ( -> )
	;meta #06 DEO2
	;args #10 DEO2

	#0017 DEI EQU ?usage	( die if no args )
	;lut
	( >> )
@fill-lut ( ptr* -> )
	#ffff OVR2 STA2			( write ffff to LUT )
	INC2 INC2			( offset )
	DUP2 ;lut/end NEQ2 ?fill-lut	( loop if not at end )
	POP2
	BRK
@meta
	"grep.rom 0a
	"aleteoryx, 20 "all 20 "rights 20 "released 0a0a
	
	"bezza-yates-gonet 20 ""bitlap" 20 "text 20 "search 0a00

@args ( -> )
	#17 DEI
	#02 DUP2 NEQ ?{ POP2 !lut/in }	( argument byte, parse )
	INC      EQU ?usage		( only accept one arg )	
	
	( must be 04 )
	
	( we need to shift state up 10 - lut/len bits before orring it
	  with match, this is where that's calculated. )
	;lut/len LDA2 SUB 	( the high byte will be 00 )
	;bitlap/sft STA
	
	;bitlap/in #10 DEO2
	[ LITr 80 ] #ffff	( prep the stack for bitlap/in )
	BRK
	

( --- printing --- )

@usage/str "usage: 20 "<program> 20 "| 20 "grep.rom 20 "PATTERN 0a
@usage ( -- )
	#01 ;/str !die
@toolong/str "fatal: 20 "pattern 20 "must 20 "be 20 "<16 20 "bytes. 0a
@toolong ( -- )
	#02 ;/str ( >> )
@die ( code str* -- )
	&loop
		LDAk DUP #19 DEO	( get and write char )
		STH INC2 STHr		( increment str* )
		#0a NEQ ?/loop		( loop if char != 0a )
	POP2 #0f DEO BRK		( exit with status XX )


( --- check for a match --- )

@bitlap/in ( state* `match -> state* `match )
	( handle the line buffer )
	#0a12 DEI DUP		( get char, save a byte later )
	[ LIT2 &ptr =/line ]	( line pointer )
	STH2k STA STH2r		( save byte in buffer )
	INC2 ,/ptr STR2		( save new pointer )
	EQU ?/newline		( reset state on newline )

	( shift-or )
	#10 SFT2		( SHIFT in a 0 )
	lut/getaddr LDA2 ORA2	( OR with LUT value )
	
	( matches are stored in the high bit of the top byte of the
	  return stack. if the high bit is low [i.e. we got a match],
	  this will set match to 00, otherwise it will stay 01. )
	DUP2 [ LIT &sft $1 ] SFT2
	POP STH ANDr BRK

@bitlap/newline ( state* `match -> state* `match )
	;/line ,/ptr STR2	( reset line ptr )
	POP2 #ffff		( reset state )
	STHr [ LITr 80 ]	( reset match )
	?{			( we saw a match, print the line )
		;/line
		&loop
			LDAk DUP #18 DEO	( get and write char )
			STH INC2 STHr		( increment str* )
			#0a NEQ ?/loop		( loop if char != 0a )
		POP2
	} BRK


( --- look-up table --- )

@lut/in ( -> )
	( first, compute the LUT bit )
	#0001			( gets shifted later )
	[ LIT2 &len 0000 ]	( how far into the pattern we are )
	STH2k			( save a byte later )
	SWP ?toolong		( die, pattern is > 16 bytes )
	SFT2 #ffff EOR2		( nth bit is 0, all others are 1 )
	
	( next, write it into the LUT )
	/getaddr
	STH2k LDA2 AND2		( compute new value for slot )
	STH2r STA2		( write new value )
	
	( last, update the counter )
	STH2r #0010 ADD2	( calculate new counter value )
	,/len STR2		( save it )
	BRK

@lut/getaddr ( -- addr* )
	#0012 DEI		( input byte, prefixed with 00 )
	#10 SFT2 ;lut ADD2	( offset in lookup table )
	JMP2r


@lut ( ... ) $200 ( ... ) @lut/end
@bitlap/line ( the incoming line gets written here )