Skip to content

Commit 8f609b6

Browse files
handle caret properly and add test cases
1 parent fb1b41d commit 8f609b6

File tree

2 files changed

+76
-5
lines changed

2 files changed

+76
-5
lines changed

pkg/utils/leaf_convert.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,24 +273,32 @@ func ConvertInt64(value string, lst *sdcpb.SchemaLeafType) (*sdcpb.TypedValue, e
273273
return convertInt(value, lst.Range, ranges)
274274
}
275275

276-
func EnsureEscaped(s string, toEscape string) string {
277-
isTarget := func(r rune) bool { return strings.ContainsRune(toEscape, r) }
276+
func XMLRegexConvert(s string) string {
277+
278+
cTest := func(r rune, prev rune) bool {
279+
// if ^ is not following a [ or if $ we want to return true
280+
return (r == '^' && prev != '[') || r == '$'
281+
}
278282

279283
b := strings.Builder{}
280284
b.Grow(len(s) + len(s)/4)
281285
slashes := 0
286+
prevR := rune(0)
282287

283288
for _, r := range s {
284289
if r == '\\' {
285290
slashes++
291+
prevR = r
286292
b.WriteRune(r)
287293
continue
288294
}
289295

290-
if isTarget(r) && slashes%2 == 0 {
296+
if cTest(r, prevR) && slashes%2 == 0 {
291297
b.WriteRune('\\')
292298
}
299+
293300
slashes = 0
301+
prevR = r
294302
b.WriteRune(r)
295303
}
296304
return b.String()
@@ -317,8 +325,8 @@ func ConvertString(value string, lst *sdcpb.SchemaLeafType) (*sdcpb.TypedValue,
317325
// the set of metacharacters defined in go is: \.+*?()|[]{}^$ (go/libexec/src/regexp/regexp.go:714)
318326
// we need therefore to escape some values
319327
// TODO check about '^'
320-
321-
escaped := EnsureEscaped(sp.Pattern, "$")
328+
329+
escaped := XMLRegexConvert(sp.Pattern)
322330
re, err := regexp.Compile(escaped)
323331
if err != nil {
324332
log.Errorf("unable to compile regex %q", sp.Pattern)

pkg/utils/leaf_convert_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package utils
2+
3+
import (
4+
"reflect"
5+
"testing"
6+
)
7+
8+
func TestXMLRegexConvert(t *testing.T) {
9+
10+
tests := []struct {
11+
name string
12+
in string
13+
want string
14+
}{
15+
{
16+
name: "anchors become literals",
17+
in: `^\d+$`,
18+
want: `\^\d+\$`,
19+
},
20+
{
21+
name: "already-escaped anchors stay escaped",
22+
in: `foo\$bar`,
23+
want: `foo\$bar`,
24+
},
25+
{
26+
name: "caret in char class is left alone, dollar is escaped",
27+
in: `[^\w]+$`,
28+
want: `[^\w]+\$`,
29+
},
30+
{
31+
name: "caret later inside char class is escaped",
32+
in: `[a^b]`,
33+
want: `[a\^b]`,
34+
},
35+
{
36+
name: "caret escaped inside char class is escaped",
37+
in: `[\^]`,
38+
want: `[\^]`,
39+
},
40+
{
41+
name: "caret in char class multiple times, dollar is escaped",
42+
in: `[^a^b]`,
43+
want: `[^a\^b]`,
44+
},
45+
{
46+
name: "anchors preceded by a single back-slash stay escaped",
47+
in: `\^test\$`,
48+
want: `\^test\$`,
49+
},
50+
{
51+
name: "empty string",
52+
in: ``,
53+
want: ``,
54+
},
55+
}
56+
for _, tt := range tests {
57+
t.Run(tt.name, func(t *testing.T) {
58+
if got := XMLRegexConvert(tt.in); !reflect.DeepEqual(got, tt.want) {
59+
t.Errorf("XMLRegexConvert() = %v, want %v", got, tt.want)
60+
}
61+
})
62+
}
63+
}

0 commit comments

Comments
 (0)