@@ -273,6 +273,29 @@ func ConvertInt64(value string, lst *sdcpb.SchemaLeafType) (*sdcpb.TypedValue, e
273273 return convertInt (value , lst .Range , ranges )
274274}
275275
276+ func EnsureEscaped (s string , toEscape string ) string {
277+ isTarget := func (r rune ) bool { return strings .ContainsRune (toEscape , r ) }
278+
279+ b := strings.Builder {}
280+ b .Grow (len (s ) + len (s )/ 4 )
281+ slashes := 0
282+
283+ for _ , r := range s {
284+ if r == '\\' {
285+ slashes ++
286+ b .WriteRune (r )
287+ continue
288+ }
289+
290+ if isTarget (r ) && slashes % 2 == 0 {
291+ b .WriteRune ('\\' )
292+ }
293+ slashes = 0
294+ b .WriteRune (r )
295+ }
296+ return b .String ()
297+ }
298+
276299func ConvertString (value string , lst * sdcpb.SchemaLeafType ) (* sdcpb.TypedValue , error ) {
277300 // check length of the string if the length property is set
278301 // length will contain a range like string definition "5..60" or "7..10|40..45"
@@ -289,7 +312,14 @@ func ConvertString(value string, lst *sdcpb.SchemaLeafType) (*sdcpb.TypedValue,
289312 // If the type has multiple "pattern" statements, the expressions are
290313 // ANDed together, i.e., all such expressions have to match.
291314 for _ , sp := range lst .Patterns {
292- re , err := regexp .Compile (sp .Pattern )
315+ // The set of metacharacters is not the same between XML schema and perl/python/go REs
316+ // the set of metacharacters for XML is: .\?*+{}()[] (https://www.w3.org/TR/xmlschema-2/#dt-metac)
317+ // the set of metacharacters defined in go is: \.+*?()|[]{}^$ (go/libexec/src/regexp/regexp.go:714)
318+ // we need therefore to escape some values
319+ // TODO check about '^'
320+
321+ escaped := EnsureEscaped (sp .Pattern , "$" )
322+ re , err := regexp .Compile (escaped )
293323 if err != nil {
294324 log .Errorf ("unable to compile regex %q" , sp .Pattern )
295325 }
0 commit comments