Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF051.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re
import regex

# Errors
re.compile('single free-spacing', flags=re.X)
re.findall('si\ngle')
re.finditer("dou\ble")
re.fullmatch('''t\riple single''')
re.match("""\triple double""")
re.search('two', 'args')
re.split("raw", r'second')
re.sub(u'''nicode''', u"f(?i)rst")
re.subn(b"""ytes are""", f"\u006e")

regex.compile('single free-spacing', flags=regex.X)
regex.findall('si\ngle')
regex.finditer("dou\ble")
regex.fullmatch('''t\riple single''')
regex.match("""\triple double""")
regex.search('two', 'args')
regex.split("raw", r'second')
regex.sub(u'''nicode''', u"f(?i)rst")
regex.subn(b"""ytes are""", f"\u006e")

regex.template("""(?m)
(?:ulti)?
(?=(?<!(?<=(?!l)))
l(?i:ne)
""", flags = regex.X)


# No errors
re.compile(R'uppercase')
re.findall(not_literal)
re.finditer(0, literal_but_not_string)
re.fullmatch() # no first argument
re.match('string' f'''concatenation''')
re.search(R"raw" r'concatenation')
re.split(rf"multiple", f"""lags""")
re.sub(FR'ee', '''as in free speech''')
re.subn(br"""eak your machine with rm -""", rf"""/""")

regex.compile(R'uppercase')
regex.findall(not_literal)
regex.finditer(0, literal_but_not_string)
regex.fullmatch() # no first argument
regex.match('string' f'''concatenation''')
regex.search(R"raw" r'concatenation')
regex.split(rf"multiple", f"""lags""")
regex.sub(FR'ee', '''as in free speech''')
regex.subn(br"""eak your machine with rm -""", rf"""/""")

regex.splititer(both, non_literal)
regex.subf(f, lambda _: r'means', '"format"')
regex.subfn(fn, f'''a$1n't''', lambda: "'function'")
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::MapIntVersionParsing) {
ruff::rules::map_int_version_parsing(checker, call);
}
if checker.enabled(Rule::UnrawRePattern) {
ruff::rules::unraw_re_pattern(checker, call);
}
}
Expr::Dict(dict) => {
if checker.any_enabled(&[
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "036") => (RuleGroup::Preview, rules::ruff::rules::NoneNotAtEndOfUnion),
(Ruff, "038") => (RuleGroup::Preview, rules::ruff::rules::RedundantBoolLiteral),
(Ruff, "048") => (RuleGroup::Preview, rules::ruff::rules::MapIntVersionParsing),
(Ruff, "051") => (RuleGroup::Preview, rules::ruff::rules::UnrawRePattern),
(Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA),
(Ruff, "101") => (RuleGroup::Stable, rules::ruff::rules::RedirectedNOQA),

Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ mod tests {
#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008_attrs.py"))]
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048.py"))]
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048_1.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF051.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub(crate) use static_key_dict_comprehension::*;
pub(crate) use test_rules::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
pub(crate) use unnecessary_key_check::*;
pub(crate) use unraw_re_pattern::*;
pub(crate) use unsafe_markup_use::*;
pub(crate) use unused_async::*;
pub(crate) use unused_noqa::*;
Expand Down Expand Up @@ -74,6 +75,7 @@ mod suppression_comment_visitor;
pub(crate) mod test_rules;
mod unnecessary_iterable_allocation_for_first_element;
mod unnecessary_key_check;
mod unraw_re_pattern;
mod unsafe_markup_use;
mod unused_async;
mod unused_noqa;
Expand Down
176 changes: 176 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral};
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::{Ranged, TextRange};
use std::fmt::{Display, Formatter};

use crate::checkers::ast::Checker;

/// ## What it does
/// Reports the following `re` and `regex` calls when
/// their first arguments are not raw strings:
///
/// - Both modules: `compile`, `findall`, `finditer`,
/// `fullmatch`, `match`, `search`, `split`, `sub`, `subn`.
/// - `regex`-specific: `splititer`, `subf`, `subfn`, `template`.
///
/// ## Why is this bad?
/// Regular expressions should be written
/// using raw strings to avoid double escaping.
///
/// ## Example
///
/// ```python
/// re.compile("foo\\bar")
/// ```
///
/// Use instead:
///
/// ```python
/// re.compile(r"foo\bar")
/// ```
#[violation]
pub struct UnrawRePattern {
module: RegexModule,
func: String,
kind: PatternKind,
}

impl Violation for UnrawRePattern {
#[derive_message_formats]
fn message(&self) -> String {
let Self { module, func, kind } = &self;
let call = format!("`{module}.{func}()`");

match kind {
PatternKind::String => format!("First argument to {call} is not raw string"),
PatternKind::Bytes => format!("First argument to {call} is not raw bytes literal"),
}
}

fn fix_title(&self) -> Option<String> {
match self.kind {
PatternKind::String => Some("Replace with raw string".to_string()),
PatternKind::Bytes => Some("Replace with raw bytes literal".to_string()),
}
}
}

#[derive(Debug, Eq, PartialEq)]
enum RegexModule {
Re,
Regex,
}

impl RegexModule {
fn is_regex(&self) -> bool {
matches!(self, RegexModule::Regex)
}
}

impl Display for RegexModule {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
RegexModule::Re => "re",
RegexModule::Regex => "regex",
}
)
}
}

#[derive(Debug, Eq, PartialEq)]
enum PatternKind {
String,
Bytes,
}

/// RUF051
pub(crate) fn unraw_re_pattern(checker: &mut Checker, call: &ExprCall) {
let semantic = checker.semantic();

if !semantic.seen_module(Modules::RE) && !semantic.seen_module(Modules::REGEX) {
return;
}

let Some((module, func)) = regex_module_and_func(semantic, call.func.as_ref()) else {
return;
};
let Some((kind, range)) = pattern_kind_and_range(call.arguments.args.as_ref()) else {
return;
};

let diagnostic = Diagnostic::new(UnrawRePattern { module, func, kind }, range);

checker.diagnostics.push(diagnostic);
}

fn regex_module_and_func(semantic: &SemanticModel, expr: &Expr) -> Option<(RegexModule, String)> {
let qualified_name = semantic.resolve_qualified_name(expr)?;

let (module, func) = match qualified_name.segments() {
[module, func] => match *module {
"re" => (RegexModule::Re, *func),
"regex" => (RegexModule::Regex, *func),
_ => return None,
},
_ => return None,
};

if is_shared(func) || module.is_regex() && is_regex_specific(func) {
return Some((module, func.to_string()));
}

None
}

fn pattern_kind_and_range(arguments: &[Expr]) -> Option<(PatternKind, TextRange)> {
let first = arguments.first()?;
let range = first.range();

let pattern_kind = match first {
Expr::StringLiteral(ExprStringLiteral { value, .. }) => {
if value.is_implicit_concatenated() || value.is_raw() {
return None;
}

PatternKind::String
}

Expr::BytesLiteral(ExprBytesLiteral { value, .. }) => {
if value.is_implicit_concatenated() || value.is_raw() {
return None;
}

PatternKind::Bytes
}

_ => return None,
};

Some((pattern_kind, range))
}

/// Whether `func` is an attribute of both `re` and `regex`.
fn is_shared(func: &str) -> bool {
matches!(
func,
"compile"
| "findall"
| "finditer"
| "fullmatch"
| "match"
| "search"
| "split"
| "sub"
| "subn"
)
}

/// Whether `func` is an extension specific to `regex`.
fn is_regex_specific(func: &str) -> bool {
matches!(func, "splititer" | "subf" | "subfn" | "template")
}
Loading
Loading