Skip to content

Commit 1c03f8f

Browse files
committed
More regexp.
1 parent 363b12e commit 1c03f8f

File tree

2 files changed

+86
-45
lines changed

2 files changed

+86
-45
lines changed

ext/regexp/regexp.go

Lines changed: 85 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,14 @@ func Register(db *sqlite3.Conn) error {
3232
db.CreateFunction("regexp_instr", 3, flags, regexInstr),
3333
db.CreateFunction("regexp_instr", 4, flags, regexInstr),
3434
db.CreateFunction("regexp_instr", 5, flags, regexInstr),
35+
db.CreateFunction("regexp_instr", 6, flags, regexInstr),
3536
db.CreateFunction("regexp_substr", 2, flags, regexSubstr),
3637
db.CreateFunction("regexp_substr", 3, flags, regexSubstr),
3738
db.CreateFunction("regexp_substr", 4, flags, regexSubstr),
39+
db.CreateFunction("regexp_substr", 5, flags, regexSubstr),
3840
db.CreateFunction("regexp_replace", 3, flags, regexReplace),
39-
db.CreateFunction("regexp_replace", 4, flags, regexReplace))
41+
db.CreateFunction("regexp_replace", 4, flags, regexReplace),
42+
db.CreateFunction("regexp_replace", 5, flags, regexReplace))
4043
}
4144

4245
func load(ctx sqlite3.Context, i int, expr string) (*regexp.Regexp, error) {
@@ -68,6 +71,7 @@ func regexLike(ctx sqlite3.Context, arg ...sqlite3.Value) {
6871
ctx.ResultError(err)
6972
return // notest
7073
}
74+
7175
text := arg[0].RawText()
7276
ctx.ResultBool(re.Match(text))
7377
}
@@ -78,10 +82,11 @@ func regexCount(ctx sqlite3.Context, arg ...sqlite3.Value) {
7882
ctx.ResultError(err)
7983
return // notest
8084
}
85+
8186
text := arg[0].RawText()
8287
if len(arg) > 2 {
8388
pos := arg[2].Int()
84-
_, text = split(text, pos)
89+
text = text[skip(text, pos):]
8590
}
8691
ctx.ResultInt(len(re.FindAll(text, -1)))
8792
}
@@ -92,26 +97,23 @@ func regexSubstr(ctx sqlite3.Context, arg ...sqlite3.Value) {
9297
ctx.ResultError(err)
9398
return // notest
9499
}
100+
95101
text := arg[0].RawText()
102+
var pos, n, subexpr int
96103
if len(arg) > 2 {
97-
pos := arg[2].Int()
98-
_, text = split(text, pos)
104+
pos = arg[2].Int()
99105
}
100-
n := 0
101106
if len(arg) > 3 {
102107
n = arg[3].Int()
103108
}
109+
if len(arg) > 4 {
110+
subexpr = arg[4].Int()
111+
}
104112

105-
var res []byte
106-
if n <= 1 {
107-
res = re.Find(text)
108-
} else {
109-
all := re.FindAll(text, n)
110-
if n <= len(all) {
111-
res = all[n-1]
112-
}
113+
loc := regexFind(re, text, pos, n, subexpr)
114+
if loc != nil {
115+
ctx.ResultRawText(text[loc[0]:loc[1]])
113116
}
114-
ctx.ResultRawText(res)
115117
}
116118

117119
func regexInstr(ctx sqlite3.Context, arg ...sqlite3.Value) {
@@ -120,35 +122,26 @@ func regexInstr(ctx sqlite3.Context, arg ...sqlite3.Value) {
120122
ctx.ResultError(err)
121123
return // notest
122124
}
123-
pos := 1
125+
124126
text := arg[0].RawText()
127+
var pos, n, end, subexpr int
125128
if len(arg) > 2 {
126129
pos = arg[2].Int()
127-
_, text = split(text, pos)
128130
}
129-
n := 0
130131
if len(arg) > 3 {
131132
n = arg[3].Int()
132133
}
133-
134-
var loc []int
135-
if n <= 1 {
136-
loc = re.FindIndex(text)
137-
} else {
138-
all := re.FindAllIndex(text, n)
139-
if n <= len(all) {
140-
loc = all[n-1]
141-
}
134+
if len(arg) > 4 && arg[4].Bool() {
135+
end = 1
142136
}
143-
if loc == nil {
144-
return
137+
if len(arg) > 5 {
138+
subexpr = arg[4].Int()
145139
}
146140

147-
end := 0
148-
if len(arg) > 4 && arg[4].Bool() {
149-
end = 1
141+
loc := regexFind(re, text, pos, n, subexpr)
142+
if loc != nil {
143+
ctx.ResultInt(loc[end] + 1)
150144
}
151-
ctx.ResultInt(pos + loc[end])
152145
}
153146

154147
func regexReplace(ctx sqlite3.Context, arg ...sqlite3.Value) {
@@ -157,24 +150,71 @@ func regexReplace(ctx sqlite3.Context, arg ...sqlite3.Value) {
157150
ctx.ResultError(err)
158151
return // notest
159152
}
160-
var head, tail []byte
161-
tail = arg[0].RawText()
153+
154+
text := arg[0].RawText()
155+
repl := arg[2].RawText()
156+
var pos, n int
162157
if len(arg) > 3 {
163-
pos := arg[3].Int()
164-
head, tail = split(tail, pos)
158+
pos = arg[3].Int()
159+
}
160+
if len(arg) > 4 {
161+
n = arg[4].Int()
162+
}
163+
164+
res := text
165+
pos = skip(text, pos)
166+
if n > 0 {
167+
all := re.FindAllSubmatchIndex(text[pos:], n)
168+
if n <= len(all) {
169+
loc := all[n-1]
170+
res = text[:pos+loc[0]]
171+
res = re.Expand(res, repl, text[pos:], loc)
172+
res = append(res, text[pos+loc[1]:]...)
173+
}
174+
} else {
175+
res = append(text[:pos], re.ReplaceAll(text[pos:], repl)...)
165176
}
166-
tail = re.ReplaceAll(tail, arg[2].RawText())
167-
if head != nil {
168-
tail = append(head, tail...)
177+
ctx.ResultRawText(res)
178+
}
179+
180+
func regexFind(re *regexp.Regexp, text []byte, pos, n, subexpr int) (loc []int) {
181+
pos = skip(text, pos)
182+
text = text[pos:]
183+
184+
if n <= 1 {
185+
if subexpr == 0 {
186+
loc = re.FindIndex(text)
187+
} else {
188+
loc = re.FindSubmatchIndex(text)
189+
}
190+
} else {
191+
if subexpr == 0 {
192+
all := re.FindAllIndex(text, n)
193+
if n <= len(all) {
194+
loc = all[n-1]
195+
}
196+
} else {
197+
all := re.FindAllSubmatchIndex(text, n)
198+
if n <= len(all) {
199+
loc = all[n-1]
200+
}
201+
}
202+
}
203+
204+
if 2+2*subexpr <= len(loc) {
205+
loc = loc[2*subexpr : 2+2*subexpr]
206+
loc[0] += pos
207+
loc[1] += pos
208+
return loc
169209
}
170-
ctx.ResultRawText(tail)
210+
return nil
171211
}
172212

173-
func split(s []byte, i int) (head, tail []byte) {
174-
for pos := range string(s) {
175-
if i--; i <= 0 {
176-
return s[:pos:pos], s[pos:]
213+
func skip(text []byte, start int) int {
214+
for pos := range string(text) {
215+
if start--; start <= 0 {
216+
return pos
177217
}
178218
}
179-
return s, nil
219+
return len(text)
180220
}

ext/regexp/regexp_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ func TestRegister(t *testing.T) {
3636
{`regexp_substr('Hello', 'el.')`, "ell"},
3737
{`regexp_substr('Hello', 'l', 2, 2)`, "l"},
3838
{`regexp_replace('Hello', 'llo', 'll')`, "Hell"},
39+
{`regexp_replace('Hello', 'llo', 'll', 1, 1)`, "Hell"},
3940

4041
{`regexp_count('123123123123123', '(12)3', 1)`, "5"},
4142
{`regexp_instr('500 Oracle Parkway, Redwood Shores, CA', '(?i)[s|r|p][[:alpha:]]{6}', 3, 2, 1)`, "28"},

0 commit comments

Comments
 (0)