Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
| [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) | |
| [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: |
| [regexp/no-useless-dollar-replacements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-dollar-replacements.html) | disallow useless `$` replacements in replacement string | |
| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | |
| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | :wrench: |

### Best Practices

Expand Down
2 changes: 1 addition & 1 deletion docs/rules/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
| [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) | |
| [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: |
| [regexp/no-useless-dollar-replacements](./no-useless-dollar-replacements.md) | disallow useless `$` replacements in replacement string | |
| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | |
| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | :wrench: |

### Best Practices

Expand Down
3 changes: 2 additions & 1 deletion docs/rules/strict.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ description: "disallow not strictly valid regular expressions"
> disallow not strictly valid regular expressions

- :exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule.

## :book: Rule Details

Expand All @@ -20,7 +21,7 @@ Depending on the syntax defined in [Annex B] of the ECMAScript specification, so

[Annex B]: https://tc39.es/ecma262/#sec-regular-expressions-patterns

<eslint-code-block>
<eslint-code-block fix>

```js
/* eslint regexp/strict: "error" */
Expand Down
231 changes: 221 additions & 10 deletions lib/rules/strict.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
import { RegExpValidator } from "regexpp"
import type { CharacterClassElement, Element } from "regexpp/ast"
import type { RegExpVisitor } from "regexpp/visitor"
import type { RegExpContext } from "../utils"
import { createRule, defineRegexpVisitor } from "../utils"
import {
isOctalEscape,
createRule,
defineRegexpVisitor,
isEscapeSequence,
} from "../utils"

const validator = new RegExpValidator({ strict: true })
const validator = new RegExpValidator({ strict: true, ecmaVersion: 2020 })

/**
* Check syntax error in a given pattern.
* @returns {string|null} The syntax error.
* @returns The syntax error.
*/
function validateRegExpPattern(pattern: string, uFlag?: boolean) {
function validateRegExpPattern(
pattern: string,
uFlag?: boolean,
): string | null {
try {
validator.validatePattern(pattern, undefined, undefined, uFlag)
return null
Expand All @@ -18,6 +27,9 @@ function validateRegExpPattern(pattern: string, uFlag?: boolean) {
}
}

const CHARACTER_CLASS_SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|-+*?".split(""))
const SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|+*?".split(""))

export default createRule("strict", {
meta: {
docs: {
Expand All @@ -27,8 +39,33 @@ export default createRule("strict", {
// recommended: true,
recommended: false,
},
fixable: "code",
schema: [],
messages: {
// character escape
invalidControlEscape:
"Invalid or incomplete control escape sequence. Either use a valid control escape sequence or escaping the standalone backslash.",
incompleteEscapeSequence:
"Incomplete escape sequence '{{expr}}'. Either use a valid escape sequence or remove the useless escaping.",
invalidPropertyEscape:
"Invalid property escape sequence '{{expr}}'. Either use a valid property escape sequence or remove the useless escaping.",
incompleteBackreference:
"Incomplete backreference '{{expr}}'. Either use a valid backreference or remove the useless escaping.",
unescapedSourceCharacter: "Unescaped source character '{{expr}}'.",
octalEscape:
"Invalid legacy octal escape sequence '{{expr}}'. Use a hexadecimal escape instead.",
uselessEscape:
"Useless identity escapes with non-syntax characters are forbidden.",

// character class
invalidRange:
"Invalid character class range. A character set cannot be the minimum or maximum of a character class range. Either escape the `-` or fix the character class range.",

// assertion
quantifiedAssertion:
"Assertion are not allowed to be quantified directly.",

// validator
regexMessage: "{{message}}.",
},
type: "suggestion",
Expand All @@ -40,21 +77,195 @@ export default createRule("strict", {
function createVisitor(
regexpContext: RegExpContext,
): RegExpVisitor.Handlers {
const { node, flags, pattern } = regexpContext
const {
node,
flags,
pattern,
getRegexpLocation,
fixReplaceNode,
} = regexpContext

if (flags.unicode) {
// the Unicode flag enables strict parsing mode automatically
return {}
}

const message = validateRegExpPattern(pattern, flags.unicode)
let reported = false
let hasNamedBackreference = false

/** Report */
function report(
messageId: string,
element: Element,
fix?: string | null,
): void {
reported = true

if (message) {
context.report({
node,
messageId: "regexMessage",
loc: getRegexpLocation(element),
messageId,
data: {
message,
expr: element.raw,
},
fix: fix ? fixReplaceNode(element, fix) : null,
})
}

return {}
return {
// eslint-disable-next-line complexity -- x
onCharacterEnter(cNode) {
if (cNode.raw === "\\") {
// e.g. \c5 or \c
report("invalidControlEscape", cNode)
return
}
if (cNode.raw === "\\u" || cNode.raw === "\\x") {
// e.g. \u000;
report("incompleteEscapeSequence", cNode)
return
}
if (cNode.raw === "\\p" || cNode.raw === "\\P") {
// e.g. \p{H} or \p
report("invalidPropertyEscape", cNode)
return
}
if (cNode.value !== 0 && isOctalEscape(cNode.raw)) {
// e.g. \023
report(
"octalEscape",
cNode,
`\\x${cNode.value.toString(16).padStart(2, "0")}`,
)
return
}

const insideCharClass =
cNode.parent.type === "CharacterClass" ||
cNode.parent.type === "CharacterClassRange"

if (!insideCharClass) {
if (cNode.raw === "\\k") {
// e.g. \k<foo or \k
report("incompleteBackreference", cNode)
return
}

if (
cNode.raw === "{" ||
cNode.raw === "}" ||
cNode.raw === "]"
) {
report(
"unescapedSourceCharacter",
cNode,
`\\${cNode.raw}`,
)
return
}
}

if (isEscapeSequence(cNode.raw)) {
// all remaining escape sequences are valid
return
}

if (cNode.raw.startsWith("\\")) {
const identity = cNode.raw.slice(1)
const syntaxChars = insideCharClass
? CHARACTER_CLASS_SYNTAX_CHARACTERS
: SYNTAX_CHARACTERS

if (
cNode.value === identity.charCodeAt(0) &&
!syntaxChars.has(identity)
) {
// e.g. \g or \;
report("uselessEscape", cNode, identity)
}
}
},
onCharacterClassEnter(ccNode) {
for (let i = 0; i < ccNode.elements.length; i++) {
const current = ccNode.elements[i]

if (current.type === "CharacterSet") {
const next: CharacterClassElement | undefined =
ccNode.elements[i + 1]
const nextNext: CharacterClassElement | undefined =
ccNode.elements[i + 2]

if (next && next.raw === "-" && nextNext) {
// e.g. [\w-a]
report("invalidRange", current)
return
}

const prev: CharacterClassElement | undefined =
ccNode.elements[i - 1]
const prevPrev: CharacterClassElement | undefined =
ccNode.elements[i - 2]
if (
prev &&
prev.raw === "-" &&
prevPrev &&
prevPrev.type !== "CharacterClassRange"
) {
// e.g. [a-\w]
report("invalidRange", current)
return
}
}
}
},
onQuantifierEnter(qNode) {
if (qNode.element.type === "Assertion") {
// e.g. \b+
report(
"quantifiedAssertion",
qNode,
`(?:${qNode.element.raw})${qNode.raw.slice(
qNode.element.end - qNode.start,
)}`,
)
}
},

onBackreferenceEnter(bNode) {
if (typeof bNode.ref === "string") {
hasNamedBackreference = true
}
},
onPatternLeave() {
if (hasNamedBackreference) {
// There is a bug in regexpp that causes it throw a
// syntax error for all non-Unicode regexes with named
// backreferences.
// TODO: Remove this workaround when the bug is fixed.
return
}

if (!reported) {
// our own logic couldn't find any problems,
// so let's use a real parser to do the job.

const message = validateRegExpPattern(
pattern,
flags.unicode,
)

if (message) {
context.report({
node,
messageId: "regexMessage",
data: {
message,
},
})
}
}
},
}
}

return defineRegexpVisitor(context, {
Expand Down
Loading