@@ -28,6 +28,13 @@ extern "C" {
2828 */
2929typedef struct rure rure ;
3030
31+ /*
32+ * rure_set is the type of a set of compiled regular expressions.
33+ *
34+ * A rure can be safely used from multiple threads simultaneously.
35+ */
36+ typedef struct rure_set rure_set ;
37+
3138/*
3239 * rure_options is the set of non-flag configuration options for compiling
3340 * a regular expression. Currently, only two options are available: setting
@@ -165,7 +172,7 @@ rure *rure_compile(const uint8_t *pattern, size_t length,
165172/*
166173 * rure_free frees the given compiled regular expression.
167174 *
168- * This must be called at most once.
175+ * This must be called at most once for any rure .
169176 */
170177void rure_free (rure * re );
171178
@@ -446,6 +453,90 @@ void rure_options_size_limit(rure_options *options, size_t limit);
446453 */
447454void rure_options_dfa_size_limit (rure_options * options , size_t limit );
448455
456+ /*
457+ * rure_compile_set compiles the given list of patterns into a single regular
458+ * expression which can be matched in a linear-scan. Each pattern in patterns
459+ * must be valid UTF-8 and the length of each pattern in patterns corresponds
460+ * to a byte length in patterns_lengths.
461+ *
462+ * The number of patterns to compile is specified by patterns_count. patterns
463+ * must contain at least this many entries.
464+ *
465+ * flags is a bitfield. Valid values are constants declared with prefix
466+ * RURE_FLAG_.
467+ *
468+ * options contains non-flag configuration settings. If it's NULL, default
469+ * settings are used. options may be freed immediately after a call to
470+ * rure_compile.
471+ *
472+ * error is set if there was a problem compiling the pattern.
473+ *
474+ * The compiled expression set returned may be used from multiple threads.
475+ */
476+ rure_set * rure_compile_set (const uint8_t * * patterns ,
477+ const size_t * patterns_lengths ,
478+ size_t patterns_count ,
479+ uint32_t flags ,
480+ rure_options * options ,
481+ rure_error * error );
482+
483+ /*
484+ * rure_set_free frees the given compiled regular expression set.
485+ *
486+ * This must be called at most once for any rure_set.
487+ */
488+ void rure_set_free (rure_set * re );
489+
490+ /*
491+ * rure_is_match returns true if and only if any regexes within the set
492+ * match anywhere in the haystack. Once a match has been located, the
493+ * matching engine will quit immediately.
494+ *
495+ * haystack may contain arbitrary bytes, but ASCII compatible text is more
496+ * useful. UTF-8 is even more useful. Other text encodings aren't supported.
497+ * length should be the number of bytes in haystack.
498+ *
499+ * start is the position at which to start searching. Note that setting the
500+ * start position is distinct from incrementing the pointer, since the regex
501+ * engine may look at bytes before the start position to determine match
502+ * information. For example, if the start position is greater than 0, then the
503+ * \A ("begin text") anchor can never match.
504+ */
505+ bool rure_set_is_match (rure_set * re , const uint8_t * haystack , size_t length ,
506+ size_t start );
507+
508+ /*
509+ * rure_set_matches compares each regex in the set against the haystack and
510+ * modifies matches with the match result of each pattern. Match results are
511+ * ordered in the same way as the rure_set was compiled. For example,
512+ * index 0 of matches corresponds to the first pattern passed to
513+ * `rure_compile_set`.
514+ *
515+ * haystack may contain arbitrary bytes, but ASCII compatible text is more
516+ * useful. UTF-8 is even more useful. Other text encodings aren't supported.
517+ * length should be the number of bytes in haystack.
518+ *
519+ * start is the position at which to start searching. Note that setting the
520+ * start position is distinct from incrementing the pointer, since the regex
521+ * engine may look at bytes before the start position to determine match
522+ * information. For example, if the start position is greater than 0, then the
523+ * \A ("begin text") anchor can never match.
524+ *
525+ * matches must be greater than or equal to the number of patterns the
526+ * rure_set was compiled with.
527+ *
528+ * Only use this function if you specifically need to know which regexes
529+ * matched within the set. To determine if any of the regexes matched without
530+ * caring which, use rure_set_is_match.
531+ */
532+ bool rure_set_matches (rure_set * re , const uint8_t * haystack , size_t length ,
533+ size_t start , bool * matches );
534+
535+ /*
536+ * rure_set_len returns the number of patterns rure_set was compiled with.
537+ */
538+ size_t rure_set_len (rure_set * re );
539+
449540/*
450541 * rure_error_new allocates space for an error.
451542 *
0 commit comments