Skip to content

Commit 44e01df

Browse files
khietashaobo-he-aws
authored andcommitted
Fix formatter handling of blank lines (#870)
1 parent a0f98d4 commit 44e01df

File tree

6 files changed

+199
-51
lines changed

6 files changed

+199
-51
lines changed

cedar-policy-formatter/src/pprint/doc.rs

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ impl Doc for Node<Option<VariableDef>> {
7474
var_doc
7575
.append(get_trailing_comment_doc_from_str(
7676
&start_comment.trailing_comment,
77+
RcDoc::nil(),
7778
))
7879
.append(is_doc)
7980
.append(RcDoc::line())
@@ -120,21 +121,23 @@ impl Doc for Node<Option<Cond>> {
120121
cond_doc
121122
.append(get_trailing_comment_doc_from_str(
122123
&cond_comment.trailing_comment,
124+
RcDoc::line(),
123125
))
124-
.append(RcDoc::line())
125126
.append(
126127
get_leading_comment_doc_from_str(&lb_comment.leading_comment).append(
127128
RcDoc::text("{").append(
128-
get_trailing_comment_doc_from_str(&lb_comment.trailing_comment)
129-
.append(RcDoc::line())
130-
.append(
131-
get_leading_comment_doc_from_str(&expr_leading_comment)
132-
.append(expr_doc.group()),
133-
)
134-
.nest(context.config.indent_width)
135-
.append(RcDoc::line())
136-
.append(rb_doc)
137-
.group(),
129+
get_trailing_comment_doc_from_str(
130+
&lb_comment.trailing_comment,
131+
RcDoc::line(),
132+
)
133+
.append(
134+
get_leading_comment_doc_from_str(&expr_leading_comment)
135+
.append(expr_doc.group()),
136+
)
137+
.nest(context.config.indent_width)
138+
.append(RcDoc::line())
139+
.append(rb_doc)
140+
.group(),
138141
),
139142
),
140143
)
@@ -145,15 +148,15 @@ impl Doc for Node<Option<Cond>> {
145148
cond_doc
146149
.append(get_trailing_comment_doc_from_str(
147150
&cond_comment.trailing_comment,
151+
RcDoc::line(),
148152
))
149-
.append(RcDoc::line())
150153
.append(
151154
get_leading_comment_doc_from_str(&lb_comment.leading_comment).append(
152155
RcDoc::text("{")
153156
.append(get_trailing_comment_doc_from_str(
154157
&lb_comment.trailing_comment,
158+
RcDoc::line(),
155159
))
156-
.append(RcDoc::line())
157160
.append(rb_doc)
158161
.group(),
159162
),
@@ -169,12 +172,12 @@ impl Doc for Node<Option<Expr>> {
169172
match self.as_inner()?.expr.as_ref() {
170173
ExprData::If(c, t, e) => {
171174
fn pp_group<'n>(
172-
s: &str,
175+
s: &'n str,
173176
c: Comment,
174177
e: &'n Node<Option<Expr>>,
175178
context: &mut Context<'_>,
176179
) -> RcDoc<'n> {
177-
add_comment(RcDoc::as_string(s), c, RcDoc::nil()).append(
180+
add_comment(RcDoc::text(s), c, RcDoc::nil()).append(
178181
RcDoc::line()
179182
.append(e.to_doc(context))
180183
.nest(context.config.indent_width),
@@ -330,7 +333,7 @@ impl Doc for Node<Option<Relation>> {
330333

331334
impl Doc for AddOp {
332335
fn to_doc(&self, _: &mut Context<'_>) -> Option<RcDoc<'_>> {
333-
Some(RcDoc::text(self.to_string()))
336+
Some(RcDoc::as_string(self))
334337
}
335338
}
336339

@@ -366,7 +369,7 @@ impl Doc for Node<Option<Add>> {
366369

367370
impl Doc for MultOp {
368371
fn to_doc(&self, _: &mut Context<'_>) -> Option<RcDoc<'_>> {
369-
Some(RcDoc::text(self.to_string()))
372+
Some(RcDoc::as_string(self))
370373
}
371374
}
372375

@@ -420,9 +423,9 @@ impl Doc for Node<Option<Unary>> {
420423
.map(|i| {
421424
Some(add_comment(
422425
if matches!(op, NegOp::Bang(_)) {
423-
RcDoc::as_string("!")
426+
RcDoc::text("!")
424427
} else {
425-
RcDoc::as_string("-")
428+
RcDoc::text("-")
426429
},
427430
comment.get(i as usize)?.clone(),
428431
RcDoc::nil(),
@@ -467,7 +470,7 @@ impl Doc for Node<Option<RecInit>> {
467470
key_doc
468471
.append(RcDoc::line_())
469472
.append(add_comment(
470-
RcDoc::as_string(":"),
473+
RcDoc::text(":"),
471474
get_comment_after_end(e.0.loc.span, &mut context.tokens)?,
472475
RcDoc::nil(),
473476
))
@@ -493,7 +496,7 @@ impl Doc for Node<Option<Name>> {
493496
let (d, e) = pair;
494497
Some((
495498
d.append(add_comment(
496-
RcDoc::as_string("::"),
499+
RcDoc::text("::"),
497500
get_comment_after_end(e.loc.span, &mut context.tokens)?,
498501
RcDoc::nil(),
499502
))
@@ -504,7 +507,7 @@ impl Doc for Node<Option<Name>> {
504507
)?
505508
.0
506509
.append(add_comment(
507-
RcDoc::as_string("::"),
510+
RcDoc::text("::"),
508511
get_comment_after_end(path.last()?.loc.span, &mut context.tokens)?,
509512
RcDoc::nil(),
510513
))
@@ -517,6 +520,9 @@ impl Doc for Node<Option<Name>> {
517520
impl Doc for Node<Option<Str>> {
518521
fn to_doc(&self, context: &mut Context<'_>) -> Option<RcDoc<'_>> {
519522
let e = self.as_inner()?;
523+
// Note: the input string may contain newlines, but `utils::create_multiline_doc`
524+
// _cannot_ be used here because this function will change indentation
525+
// on newlines, which may alter the string content.
520526
Some(add_comment(
521527
RcDoc::as_string(e),
522528
get_comment_at_start(self.loc.span, &mut context.tokens)?,
@@ -595,7 +601,7 @@ impl Doc for Node<Option<Primary>> {
595601
let (d, e) = pair;
596602
Some((
597603
d.append(add_comment(
598-
RcDoc::as_string(","),
604+
RcDoc::text(","),
599605
get_comment_after_end(e.loc.span, &mut context.tokens)?,
600606
RcDoc::nil(),
601607
))
@@ -627,7 +633,7 @@ impl Doc for Node<Option<Primary>> {
627633
let (d, e) = pair;
628634
Some((
629635
d.append(add_comment(
630-
RcDoc::as_string(","),
636+
RcDoc::text(","),
631637
get_comment_after_end(e.loc.span, &mut context.tokens)?,
632638
RcDoc::nil(),
633639
))
@@ -684,7 +690,7 @@ impl Doc for Node<Option<MemAccess>> {
684690
let (d, e) = pair;
685691
Some((
686692
d.append(add_comment(
687-
RcDoc::as_string(","),
693+
RcDoc::text(","),
688694
get_comment_after_end(e.loc.span, &mut context.tokens)?,
689695
RcDoc::nil(),
690696
))

cedar-policy-formatter/src/pprint/fmt.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ pub fn policies_str_to_pretty(ps: &str, config: &Config) -> Result<String> {
120120
.ok_or(miette!("fail to get input policy CST"))?
121121
.0
122122
.iter()
123-
.map(|p| Ok(remove_empty_lines(tree_to_pretty(p, &mut context)?.trim())))
123+
.map(|p| Ok(remove_empty_lines(&tree_to_pretty(p, &mut context)?)))
124124
.collect::<Result<Vec<String>>>()?
125125
.join("\n\n");
126126
// handle comment at the end of a policyset

cedar-policy-formatter/src/pprint/utils.rs

Lines changed: 96 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
use itertools::Itertools;
1818
use pretty::RcDoc;
19+
use regex::Regex;
1920

2021
use super::token::{Comment, WrappedToken};
2122

@@ -24,24 +25,37 @@ pub fn add_brackets<'a>(d: RcDoc<'a>, leftp: RcDoc<'a>, rightp: RcDoc<'a>) -> Rc
2425
leftp.append(d.nest(1)).append(rightp)
2526
}
2627

28+
/// Convert a leading comment to an `RcDoc`, adding leading and trailing newlines.
2729
pub fn get_leading_comment_doc_from_str<'a>(leading_comment: &str) -> RcDoc<'a> {
2830
if leading_comment.is_empty() {
2931
RcDoc::nil()
3032
} else {
31-
let cs: RcDoc<'_> = RcDoc::intersperse(
32-
leading_comment
33-
.trim()
34-
.split('\n')
35-
.map(|c| RcDoc::text(c.to_owned())),
36-
RcDoc::hardline(),
37-
);
38-
RcDoc::hardline().append(cs).append(RcDoc::hardline())
33+
RcDoc::hardline()
34+
.append(create_multiline_doc(leading_comment))
35+
.append(RcDoc::hardline())
3936
}
4037
}
4138

42-
pub fn get_trailing_comment_doc_from_str<'a>(trailing_comment: &str) -> RcDoc<'a> {
39+
/// Convert multiline text into an `RcDoc`. Both `RcDoc::as_string` and
40+
/// `RcDoc::text` allow newlines in the text (although the official
41+
/// documentation says they don't), but the resulting text will maintain its
42+
/// original indentation instead of the new "pretty" indentation.
43+
fn create_multiline_doc<'a>(str: &str) -> RcDoc<'a> {
44+
RcDoc::intersperse(
45+
str.trim().split('\n').map(|c| RcDoc::text(c.to_owned())),
46+
RcDoc::hardline(),
47+
)
48+
}
49+
50+
/// Convert a trailing comment to an `RcDoc`, adding a trailing newline.
51+
/// There is no need to use `create_multiline_doc` because a trailing comment
52+
/// cannot contain newlines.
53+
pub fn get_trailing_comment_doc_from_str<'a>(
54+
trailing_comment: &str,
55+
next_doc: RcDoc<'a>,
56+
) -> RcDoc<'a> {
4357
if trailing_comment.is_empty() {
44-
RcDoc::nil()
58+
next_doc
4559
} else {
4660
RcDoc::space()
4761
.append(RcDoc::text(trailing_comment.trim().to_owned()))
@@ -112,26 +126,83 @@ pub fn get_comment_in_range(span: miette::SourceSpan, tokens: &mut [WrappedToken
112126
.collect()
113127
}
114128

115-
// Wrap doc with comment
129+
/// Wrap an `RcDoc` with comments. If there is a leading comment, then this
130+
/// will introduce a newline bat the start of the `RcDoc`. If there is a
131+
/// trailing comment, then it will introduce a newline at the end.
116132
pub fn add_comment<'a>(d: RcDoc<'a>, comment: Comment, next_doc: RcDoc<'a>) -> RcDoc<'a> {
117133
let leading_comment = comment.leading_comment;
118134
let trailing_comment = comment.trailing_comment;
119135
let leading_comment_doc = get_leading_comment_doc_from_str(&leading_comment);
120-
let trailing_comment_doc: RcDoc<'_> = if trailing_comment.is_empty() {
121-
d.append(next_doc)
122-
} else {
123-
d.append(RcDoc::space())
124-
.append(RcDoc::text(trailing_comment.trim().to_owned()))
125-
.append(RcDoc::hardline())
126-
};
136+
let trailing_comment_doc = get_trailing_comment_doc_from_str(&trailing_comment, next_doc);
137+
leading_comment_doc.append(d).append(trailing_comment_doc)
138+
}
127139

128-
leading_comment_doc.append(trailing_comment_doc.clone())
140+
/// Remove empty lines from the input string, ignoring the first and last lines.
141+
/// (Because of how this function is used in `remove_empty_lines`, the first and
142+
/// last lines may include important spacing information.) This will remove empty
143+
/// lines _everywhere_, including in places where that may not be desired
144+
/// (e.g., in string literals).
145+
fn remove_empty_interior_lines(s: &str) -> String {
146+
let mut new_s = String::new();
147+
if s.starts_with('\n') {
148+
new_s.push_str("\n");
149+
}
150+
new_s.push_str(
151+
s.split_inclusive('\n')
152+
// in the case where `s` does not end in a newline, `!ss.contains('\n')`
153+
// preserves whitespace on the last line
154+
.filter(|ss| !ss.trim().is_empty() || !ss.contains('\n'))
155+
.collect::<Vec<_>>()
156+
.join("")
157+
.as_str(),
158+
);
159+
new_s
129160
}
130161

131-
pub fn remove_empty_lines(s: &str) -> String {
132-
s.lines()
133-
.filter(|ss| !ss.trim().is_empty())
134-
.map(|s| s.to_owned())
135-
.collect::<Vec<String>>()
136-
.join("\n")
162+
/// Remove empty lines, safely handling newlines that occur in quotations.
163+
pub fn remove_empty_lines(text: &str) -> String {
164+
// PANIC SAFETY: this regex pattern is valid
165+
#[allow(clippy::unwrap_used)]
166+
let comment_regex = Regex::new(r"//[^\n]*").unwrap();
167+
// PANIC SAFETY: this regex pattern is valid
168+
#[allow(clippy::unwrap_used)]
169+
let string_regex = Regex::new(r#""(\\.|[^"\\])*"[^\n]*"#).unwrap();
170+
171+
let mut index = 0;
172+
let mut final_text = String::new();
173+
174+
while index < text.len() {
175+
// Check for the next comment and string. The general strategy is to
176+
// call `remove_empty_interior_lines` on all the text _outside_ of
177+
// strings. Comments should be skipped to avoid interpreting a quote in
178+
// a comment as a string.
179+
let comment_match = comment_regex.find_at(text, index);
180+
let string_match = string_regex.find_at(text, index);
181+
match (comment_match, string_match) {
182+
(Some(m1), Some(m2)) => {
183+
// Handle the earlier match
184+
let m = std::cmp::min_by_key(m1, m2, |m| m.start());
185+
// PANIC SAFETY: Slicing `text` is safe since `index <= m.start()` and both are within the bounds of `text`.
186+
#[allow(clippy::indexing_slicing)]
187+
final_text.push_str(&remove_empty_interior_lines(&text[index..m.start()]));
188+
final_text.push_str(m.as_str());
189+
index = m.end();
190+
}
191+
(Some(m), None) | (None, Some(m)) => {
192+
// PANIC SAFETY: Slicing `text` is safe since `index <= m.start()` and both are within the bounds of `text`.
193+
#[allow(clippy::indexing_slicing)]
194+
final_text.push_str(&remove_empty_interior_lines(&text[index..m.start()]));
195+
final_text.push_str(m.as_str());
196+
index = m.end();
197+
}
198+
(None, None) => {
199+
// PANIC SAFETY: Slicing `text` is safe since `index` is within the bounds of `text`.
200+
#[allow(clippy::indexing_slicing)]
201+
final_text.push_str(&remove_empty_interior_lines(&text[index..]));
202+
break;
203+
}
204+
}
205+
}
206+
// Trim the final result to account for dangling newlines
207+
final_text.trim().to_string()
137208
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Test fix for #862 where blank lines in strings were removed.
2+
3+
// The output of the formatter should change string or eid content (including
4+
// removing blank lines) because this will change the policy's semantics. It is
5+
// ok to remove blank lines everywhere else.
6+
7+
permit(principal == User
8+
9+
::
10+
11+
"alice", action, resource
12+
13+
in Folder::"Name
14+
15+
16+
with a newline") when // trailing comment
17+
18+
{
19+
20+
context.foo == "string
21+
22+
with
23+
24+
newlines and other strange characters🐈👍\"
25+
26+
// even something that looks like a comment
27+
28+
"
29+
30+
// Quotes in comments "
31+
32+
// shouldn't matter "
33+
34+
};

0 commit comments

Comments
 (0)