Skip to content

Commit b89a2cd

Browse files
khietashaobo-he-aws
authored andcommitted
Fix formatter handling of blank lines (#870)
1 parent 9a1fc11 commit b89a2cd

File tree

6 files changed

+200
-51
lines changed

6 files changed

+200
-51
lines changed

cedar-policy-formatter/src/pprint/doc.rs

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ impl Doc for ASTNode<Option<VariableDef>> {
4747
var_doc
4848
.append(get_trailing_comment_doc_from_str(
4949
&start_comment.trailing_comment,
50+
RcDoc::nil(),
5051
))
5152
.append(RcDoc::line())
5253
.append(add_comment(
@@ -66,6 +67,7 @@ impl Doc for ASTNode<Option<VariableDef>> {
6667
.group()
6768
.append(get_trailing_comment_doc_from_str(
6869
&end_comment.trailing_comment,
70+
RcDoc::nil(),
6971
)),
7072
),
7173
None => add_comment(var_doc, start_comment, RcDoc::nil()),
@@ -91,21 +93,23 @@ impl Doc for ASTNode<Option<Cond>> {
9193
cond_doc
9294
.append(get_trailing_comment_doc_from_str(
9395
&cond_comment.trailing_comment,
96+
RcDoc::line(),
9497
))
95-
.append(RcDoc::line())
9698
.append(
9799
get_leading_comment_doc_from_str(&lb_comment.leading_comment).append(
98100
RcDoc::text("{").append(
99-
get_trailing_comment_doc_from_str(&lb_comment.trailing_comment)
100-
.append(RcDoc::line())
101-
.append(
102-
get_leading_comment_doc_from_str(&expr_leading_comment)
103-
.append(expr_doc.group()),
104-
)
105-
.nest(context.config.indent_width)
106-
.append(RcDoc::line())
107-
.append(rb_doc)
108-
.group(),
101+
get_trailing_comment_doc_from_str(
102+
&lb_comment.trailing_comment,
103+
RcDoc::line(),
104+
)
105+
.append(
106+
get_leading_comment_doc_from_str(&expr_leading_comment)
107+
.append(expr_doc.group()),
108+
)
109+
.nest(context.config.indent_width)
110+
.append(RcDoc::line())
111+
.append(rb_doc)
112+
.group(),
109113
),
110114
),
111115
)
@@ -116,15 +120,15 @@ impl Doc for ASTNode<Option<Cond>> {
116120
cond_doc
117121
.append(get_trailing_comment_doc_from_str(
118122
&cond_comment.trailing_comment,
123+
RcDoc::line(),
119124
))
120-
.append(RcDoc::line())
121125
.append(
122126
get_leading_comment_doc_from_str(&lb_comment.leading_comment).append(
123127
RcDoc::text("{")
124128
.append(get_trailing_comment_doc_from_str(
125129
&lb_comment.trailing_comment,
130+
RcDoc::line(),
126131
))
127-
.append(RcDoc::line())
128132
.append(rb_doc)
129133
.group(),
130134
),
@@ -140,12 +144,12 @@ impl Doc for ASTNode<Option<Expr>> {
140144
match self.as_inner()?.expr.as_ref() {
141145
ExprData::If(c, t, e) => {
142146
fn pp_group<'n>(
143-
s: &str,
147+
s: &'n str,
144148
c: Comment,
145149
e: &'n ASTNode<Option<Expr>>,
146150
context: &mut Context<'_>,
147151
) -> RcDoc<'n> {
148-
add_comment(RcDoc::as_string(s), c, RcDoc::nil()).append(
152+
add_comment(RcDoc::text(s), c, RcDoc::nil()).append(
149153
RcDoc::line()
150154
.append(e.to_doc(context))
151155
.nest(context.config.indent_width),
@@ -266,7 +270,7 @@ impl Doc for ASTNode<Option<Relation>> {
266270

267271
impl Doc for AddOp {
268272
fn to_doc(&self, _: &mut Context<'_>) -> Option<RcDoc<'_>> {
269-
Some(RcDoc::text(self.to_string()))
273+
Some(RcDoc::as_string(self))
270274
}
271275
}
272276

@@ -306,7 +310,7 @@ impl Doc for ASTNode<Option<Add>> {
306310

307311
impl Doc for MultOp {
308312
fn to_doc(&self, _: &mut Context<'_>) -> Option<RcDoc<'_>> {
309-
Some(RcDoc::text(self.to_string()))
313+
Some(RcDoc::as_string(self))
310314
}
311315
}
312316

@@ -365,9 +369,9 @@ impl Doc for ASTNode<Option<Unary>> {
365369
.map(|i| {
366370
Some(add_comment(
367371
if matches!(op, NegOp::Bang(_)) {
368-
RcDoc::as_string("!")
372+
RcDoc::text("!")
369373
} else {
370-
RcDoc::as_string("-")
374+
RcDoc::text("-")
371375
},
372376
comment.get(i as usize)?.clone(),
373377
RcDoc::nil(),
@@ -412,7 +416,7 @@ impl Doc for ASTNode<Option<RecInit>> {
412416
key_doc
413417
.append(RcDoc::line_())
414418
.append(add_comment(
415-
RcDoc::as_string(":"),
419+
RcDoc::text(":"),
416420
get_comment_after_end(e.0.info.0.end, &mut context.tokens)?,
417421
RcDoc::nil(),
418422
))
@@ -438,7 +442,7 @@ impl Doc for ASTNode<Option<Name>> {
438442
let (d, e) = pair?;
439443
Some((
440444
d.append(add_comment(
441-
RcDoc::as_string("::"),
445+
RcDoc::text("::"),
442446
get_comment_after_end(e.info.0.end, &mut context.tokens)?,
443447
RcDoc::nil(),
444448
))
@@ -449,7 +453,7 @@ impl Doc for ASTNode<Option<Name>> {
449453
)?
450454
.0
451455
.append(add_comment(
452-
RcDoc::as_string("::"),
456+
RcDoc::text("::"),
453457
get_comment_after_end(path.last()?.info.0.end, &mut context.tokens)?,
454458
RcDoc::nil(),
455459
))
@@ -462,6 +466,9 @@ impl Doc for ASTNode<Option<Name>> {
462466
impl Doc for ASTNode<Option<Str>> {
463467
fn to_doc(&self, context: &mut Context<'_>) -> Option<RcDoc<'_>> {
464468
let e = self.as_inner()?;
469+
// Note: the input string may contain newlines, but `utils::create_multiline_doc`
470+
// _cannot_ be used here because this function will change indentation
471+
// on newlines, which may alter the string content.
465472
Some(add_comment(
466473
RcDoc::as_string(e),
467474
get_comment_at_start(self.info.0.start, &mut context.tokens)?,
@@ -542,7 +549,7 @@ impl Doc for ASTNode<Option<Primary>> {
542549
let (d, e) = pair?;
543550
Some((
544551
d.append(add_comment(
545-
RcDoc::as_string(","),
552+
RcDoc::text(","),
546553
get_comment_after_end(e.info.0.end, &mut context.tokens)?,
547554
RcDoc::nil(),
548555
))
@@ -577,7 +584,7 @@ impl Doc for ASTNode<Option<Primary>> {
577584
let (d, e) = pair?;
578585
Some((
579586
d.append(add_comment(
580-
RcDoc::as_string(","),
587+
RcDoc::text(","),
581588
get_comment_after_end(e.info.0.end, &mut context.tokens)?,
582589
RcDoc::nil(),
583590
))
@@ -635,7 +642,7 @@ impl Doc for ASTNode<Option<MemAccess>> {
635642
let (d, e) = pair?;
636643
Some((
637644
d.append(add_comment(
638-
RcDoc::as_string(","),
645+
RcDoc::text(","),
639646
get_comment_after_end(e.info.0.end, &mut context.tokens)?,
640647
RcDoc::nil(),
641648
))

cedar-policy-formatter/src/pprint/fmt.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ pub fn policies_str_to_pretty(ps: &str, config: &Config) -> Result<String> {
117117
.ok_or(miette!("fail to get input policy CST"))?
118118
.0
119119
.iter()
120-
.map(|p| Ok(remove_empty_lines(tree_to_pretty(p, &mut context)?.trim())))
120+
.map(|p| Ok(remove_empty_lines(&tree_to_pretty(p, &mut context)?)))
121121
.collect::<Result<Vec<String>>>()?
122122
.join("\n\n");
123123
// handle comment at the end of a policyset

cedar-policy-formatter/src/pprint/utils.rs

Lines changed: 96 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
use itertools::Itertools;
1818
use pretty::RcDoc;
19+
use regex::Regex;
1920

2021
use super::token::{Comment, WrappedToken};
2122

@@ -24,24 +25,37 @@ pub fn add_brackets<'a>(d: RcDoc<'a>, leftp: RcDoc<'a>, rightp: RcDoc<'a>) -> Rc
2425
leftp.append(d.nest(1)).append(rightp)
2526
}
2627

28+
/// Convert a leading comment to an `RcDoc`, adding leading and trailing newlines.
2729
pub fn get_leading_comment_doc_from_str<'a>(leading_comment: &str) -> RcDoc<'a> {
2830
if leading_comment.is_empty() {
2931
RcDoc::nil()
3032
} else {
31-
let cs: RcDoc<'_> = RcDoc::intersperse(
32-
leading_comment
33-
.trim()
34-
.split('\n')
35-
.map(|c| RcDoc::text(c.to_owned())),
36-
RcDoc::hardline(),
37-
);
38-
RcDoc::hardline().append(cs).append(RcDoc::hardline())
33+
RcDoc::hardline()
34+
.append(create_multiline_doc(leading_comment))
35+
.append(RcDoc::hardline())
3936
}
4037
}
4138

42-
pub fn get_trailing_comment_doc_from_str<'a>(trailing_comment: &str) -> RcDoc<'a> {
39+
/// Convert multiline text into an `RcDoc`. Both `RcDoc::as_string` and
40+
/// `RcDoc::text` allow newlines in the text (although the official
41+
/// documentation says they don't), but the resulting text will maintain its
42+
/// original indentation instead of the new "pretty" indentation.
43+
fn create_multiline_doc<'a>(str: &str) -> RcDoc<'a> {
44+
RcDoc::intersperse(
45+
str.trim().split('\n').map(|c| RcDoc::text(c.to_owned())),
46+
RcDoc::hardline(),
47+
)
48+
}
49+
50+
/// Convert a trailing comment to an `RcDoc`, adding a trailing newline.
51+
/// There is no need to use `create_multiline_doc` because a trailing comment
52+
/// cannot contain newlines.
53+
pub fn get_trailing_comment_doc_from_str<'a>(
54+
trailing_comment: &str,
55+
next_doc: RcDoc<'a>,
56+
) -> RcDoc<'a> {
4357
if trailing_comment.is_empty() {
44-
RcDoc::nil()
58+
next_doc
4559
} else {
4660
RcDoc::space()
4761
.append(RcDoc::text(trailing_comment.trim().to_owned()))
@@ -86,26 +100,83 @@ pub fn get_comment_in_range(start: usize, end: usize, tokens: &mut [WrappedToken
86100
.collect()
87101
}
88102

89-
// Wrap doc with comment
103+
/// Wrap an `RcDoc` with comments. If there is a leading comment, then this
104+
/// will introduce a newline bat the start of the `RcDoc`. If there is a
105+
/// trailing comment, then it will introduce a newline at the end.
90106
pub fn add_comment<'a>(d: RcDoc<'a>, comment: Comment, next_doc: RcDoc<'a>) -> RcDoc<'a> {
91107
let leading_comment = comment.leading_comment;
92108
let trailing_comment = comment.trailing_comment;
93109
let leading_comment_doc = get_leading_comment_doc_from_str(&leading_comment);
94-
let trailing_comment_doc: RcDoc<'_> = if trailing_comment.is_empty() {
95-
d.append(next_doc)
96-
} else {
97-
d.append(RcDoc::space())
98-
.append(RcDoc::text(trailing_comment.trim().to_owned()))
99-
.append(RcDoc::hardline())
100-
};
110+
let trailing_comment_doc = get_trailing_comment_doc_from_str(&trailing_comment, next_doc);
111+
leading_comment_doc.append(d).append(trailing_comment_doc)
112+
}
101113

102-
leading_comment_doc.append(trailing_comment_doc.clone())
114+
/// Remove empty lines from the input string, ignoring the first and last lines.
115+
/// (Because of how this function is used in `remove_empty_lines`, the first and
116+
/// last lines may include important spacing information.) This will remove empty
117+
/// lines _everywhere_, including in places where that may not be desired
118+
/// (e.g., in string literals).
119+
fn remove_empty_interior_lines(s: &str) -> String {
120+
let mut new_s = String::new();
121+
if s.starts_with('\n') {
122+
new_s.push_str("\n");
123+
}
124+
new_s.push_str(
125+
s.split_inclusive('\n')
126+
// in the case where `s` does not end in a newline, `!ss.contains('\n')`
127+
// preserves whitespace on the last line
128+
.filter(|ss| !ss.trim().is_empty() || !ss.contains('\n'))
129+
.collect::<Vec<_>>()
130+
.join("")
131+
.as_str(),
132+
);
133+
new_s
103134
}
104135

105-
pub fn remove_empty_lines(s: &str) -> String {
106-
s.lines()
107-
.filter(|ss| !ss.trim().is_empty())
108-
.map(|s| s.to_owned())
109-
.collect::<Vec<String>>()
110-
.join("\n")
136+
/// Remove empty lines, safely handling newlines that occur in quotations.
137+
pub fn remove_empty_lines(text: &str) -> String {
138+
// PANIC SAFETY: this regex pattern is valid
139+
#[allow(clippy::unwrap_used)]
140+
let comment_regex = Regex::new(r"//[^\n]*").unwrap();
141+
// PANIC SAFETY: this regex pattern is valid
142+
#[allow(clippy::unwrap_used)]
143+
let string_regex = Regex::new(r#""(\\.|[^"\\])*"[^\n]*"#).unwrap();
144+
145+
let mut index = 0;
146+
let mut final_text = String::new();
147+
148+
while index < text.len() {
149+
// Check for the next comment and string. The general strategy is to
150+
// call `remove_empty_interior_lines` on all the text _outside_ of
151+
// strings. Comments should be skipped to avoid interpreting a quote in
152+
// a comment as a string.
153+
let comment_match = comment_regex.find_at(text, index);
154+
let string_match = string_regex.find_at(text, index);
155+
match (comment_match, string_match) {
156+
(Some(m1), Some(m2)) => {
157+
// Handle the earlier match
158+
let m = std::cmp::min_by_key(m1, m2, |m| m.start());
159+
// PANIC SAFETY: Slicing `text` is safe since `index <= m.start()` and both are within the bounds of `text`.
160+
#[allow(clippy::indexing_slicing)]
161+
final_text.push_str(&remove_empty_interior_lines(&text[index..m.start()]));
162+
final_text.push_str(m.as_str());
163+
index = m.end();
164+
}
165+
(Some(m), None) | (None, Some(m)) => {
166+
// PANIC SAFETY: Slicing `text` is safe since `index <= m.start()` and both are within the bounds of `text`.
167+
#[allow(clippy::indexing_slicing)]
168+
final_text.push_str(&remove_empty_interior_lines(&text[index..m.start()]));
169+
final_text.push_str(m.as_str());
170+
index = m.end();
171+
}
172+
(None, None) => {
173+
// PANIC SAFETY: Slicing `text` is safe since `index` is within the bounds of `text`.
174+
#[allow(clippy::indexing_slicing)]
175+
final_text.push_str(&remove_empty_interior_lines(&text[index..]));
176+
break;
177+
}
178+
}
179+
}
180+
// Trim the final result to account for dangling newlines
181+
final_text.trim().to_string()
111182
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Test fix for #862 where blank lines in strings were removed.
2+
3+
// The output of the formatter should change string or eid content (including
4+
// removing blank lines) because this will change the policy's semantics. It is
5+
// ok to remove blank lines everywhere else.
6+
7+
permit(principal == User
8+
9+
::
10+
11+
"alice", action, resource
12+
13+
in Folder::"Name
14+
15+
16+
with a newline") when // trailing comment
17+
18+
{
19+
20+
context.foo == "string
21+
22+
with
23+
24+
newlines and other strange characters🐈👍\"
25+
26+
// even something that looks like a comment
27+
28+
"
29+
30+
// Quotes in comments "
31+
32+
// shouldn't matter "
33+
34+
};

0 commit comments

Comments
 (0)