@@ -273,6 +273,37 @@ func ConvertInt64(value string, lst *sdcpb.SchemaLeafType) (*sdcpb.TypedValue, e
273273 return convertInt (value , lst .Range , ranges )
274274}
275275
276+ func XMLRegexConvert (s string ) string {
277+
278+ cTest := func (r rune , prev rune ) bool {
279+ // if ^ is not following a [ or if $ we want to return true
280+ return (r == '^' && prev != '[' ) || r == '$'
281+ }
282+
283+ b := strings.Builder {}
284+ b .Grow (len (s ) + len (s )/ 4 )
285+ slashes := 0
286+ prevR := rune (0 )
287+
288+ for _ , r := range s {
289+ if r == '\\' {
290+ slashes ++
291+ prevR = r
292+ b .WriteRune (r )
293+ continue
294+ }
295+
296+ if cTest (r , prevR ) && slashes % 2 == 0 {
297+ b .WriteRune ('\\' )
298+ }
299+
300+ slashes = 0
301+ prevR = r
302+ b .WriteRune (r )
303+ }
304+ return b .String ()
305+ }
306+
276307func ConvertString (value string , lst * sdcpb.SchemaLeafType ) (* sdcpb.TypedValue , error ) {
277308 // check length of the string if the length property is set
278309 // length will contain a range like string definition "5..60" or "7..10|40..45"
@@ -289,7 +320,14 @@ func ConvertString(value string, lst *sdcpb.SchemaLeafType) (*sdcpb.TypedValue,
289320 // If the type has multiple "pattern" statements, the expressions are
290321 // ANDed together, i.e., all such expressions have to match.
291322 for _ , sp := range lst .Patterns {
292- re , err := regexp .Compile (sp .Pattern )
323+ // The set of metacharacters is not the same between XML schema and perl/python/go REs
324+ // the set of metacharacters for XML is: .\?*+{}()[] (https://www.w3.org/TR/xmlschema-2/#dt-metac)
325+ // the set of metacharacters defined in go is: \.+*?()|[]{}^$ (go/libexec/src/regexp/regexp.go:714)
326+ // we need therefore to escape some values
327+ // TODO check about '^'
328+
329+ escaped := XMLRegexConvert (sp .Pattern )
330+ re , err := regexp .Compile (escaped )
293331 if err != nil {
294332 log .Errorf ("unable to compile regex %q" , sp .Pattern )
295333 }
0 commit comments