Skip to content

Commit 41763df

Browse files
authored
Fix formatter handling of blank lines (#870)
1 parent 127f904 commit 41763df

File tree

6 files changed

+197
-51
lines changed

6 files changed

+197
-51
lines changed

cedar-policy-formatter/src/pprint/doc.rs

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ impl Doc for Node<Option<VariableDef>> {
7272
var_doc
7373
.append(get_trailing_comment_doc_from_str(
7474
&start_comment.trailing_comment,
75+
RcDoc::nil(),
7576
))
7677
.append(is_doc)
7778
.append(RcDoc::line())
@@ -108,21 +109,23 @@ impl Doc for Node<Option<Cond>> {
108109
cond_doc
109110
.append(get_trailing_comment_doc_from_str(
110111
&cond_comment.trailing_comment,
112+
RcDoc::line(),
111113
))
112-
.append(RcDoc::line())
113114
.append(
114115
get_leading_comment_doc_from_str(&lb_comment.leading_comment).append(
115116
RcDoc::text("{").append(
116-
get_trailing_comment_doc_from_str(&lb_comment.trailing_comment)
117-
.append(RcDoc::line())
118-
.append(
119-
get_leading_comment_doc_from_str(&expr_leading_comment)
120-
.append(expr_doc.group()),
121-
)
122-
.nest(context.config.indent_width)
123-
.append(RcDoc::line())
124-
.append(rb_doc)
125-
.group(),
117+
get_trailing_comment_doc_from_str(
118+
&lb_comment.trailing_comment,
119+
RcDoc::line(),
120+
)
121+
.append(
122+
get_leading_comment_doc_from_str(&expr_leading_comment)
123+
.append(expr_doc.group()),
124+
)
125+
.nest(context.config.indent_width)
126+
.append(RcDoc::line())
127+
.append(rb_doc)
128+
.group(),
126129
),
127130
),
128131
)
@@ -133,15 +136,15 @@ impl Doc for Node<Option<Cond>> {
133136
cond_doc
134137
.append(get_trailing_comment_doc_from_str(
135138
&cond_comment.trailing_comment,
139+
RcDoc::line(),
136140
))
137-
.append(RcDoc::line())
138141
.append(
139142
get_leading_comment_doc_from_str(&lb_comment.leading_comment).append(
140143
RcDoc::text("{")
141144
.append(get_trailing_comment_doc_from_str(
142145
&lb_comment.trailing_comment,
146+
RcDoc::line(),
143147
))
144-
.append(RcDoc::line())
145148
.append(rb_doc)
146149
.group(),
147150
),
@@ -157,12 +160,12 @@ impl Doc for Node<Option<Expr>> {
157160
match self.as_inner()?.expr.as_ref() {
158161
ExprData::If(c, t, e) => {
159162
fn pp_group<'n>(
160-
s: &str,
163+
s: &'n str,
161164
c: Comment,
162165
e: &'n Node<Option<Expr>>,
163166
context: &mut Context<'_>,
164167
) -> RcDoc<'n> {
165-
add_comment(RcDoc::as_string(s), c, RcDoc::nil()).append(
168+
add_comment(RcDoc::text(s), c, RcDoc::nil()).append(
166169
RcDoc::line()
167170
.append(e.to_doc(context))
168171
.nest(context.config.indent_width),
@@ -318,7 +321,7 @@ impl Doc for Node<Option<Relation>> {
318321

319322
impl Doc for AddOp {
320323
fn to_doc(&self, _: &mut Context<'_>) -> Option<RcDoc<'_>> {
321-
Some(RcDoc::text(self.to_string()))
324+
Some(RcDoc::as_string(self))
322325
}
323326
}
324327

@@ -354,7 +357,7 @@ impl Doc for Node<Option<Add>> {
354357

355358
impl Doc for MultOp {
356359
fn to_doc(&self, _: &mut Context<'_>) -> Option<RcDoc<'_>> {
357-
Some(RcDoc::text(self.to_string()))
360+
Some(RcDoc::as_string(self))
358361
}
359362
}
360363

@@ -408,9 +411,9 @@ impl Doc for Node<Option<Unary>> {
408411
.map(|i| {
409412
Some(add_comment(
410413
if matches!(op, NegOp::Bang(_)) {
411-
RcDoc::as_string("!")
414+
RcDoc::text("!")
412415
} else {
413-
RcDoc::as_string("-")
416+
RcDoc::text("-")
414417
},
415418
comment.get(i as usize)?.clone(),
416419
RcDoc::nil(),
@@ -455,7 +458,7 @@ impl Doc for Node<Option<RecInit>> {
455458
key_doc
456459
.append(RcDoc::line_())
457460
.append(add_comment(
458-
RcDoc::as_string(":"),
461+
RcDoc::text(":"),
459462
get_comment_after_end(e.0.loc.span, &mut context.tokens)?,
460463
RcDoc::nil(),
461464
))
@@ -481,7 +484,7 @@ impl Doc for Node<Option<Name>> {
481484
let (d, e) = pair;
482485
Some((
483486
d.append(add_comment(
484-
RcDoc::as_string("::"),
487+
RcDoc::text("::"),
485488
get_comment_after_end(e.loc.span, &mut context.tokens)?,
486489
RcDoc::nil(),
487490
))
@@ -492,7 +495,7 @@ impl Doc for Node<Option<Name>> {
492495
)?
493496
.0
494497
.append(add_comment(
495-
RcDoc::as_string("::"),
498+
RcDoc::text("::"),
496499
get_comment_after_end(path.last()?.loc.span, &mut context.tokens)?,
497500
RcDoc::nil(),
498501
))
@@ -505,6 +508,9 @@ impl Doc for Node<Option<Name>> {
505508
impl Doc for Node<Option<Str>> {
506509
fn to_doc(&self, context: &mut Context<'_>) -> Option<RcDoc<'_>> {
507510
let e = self.as_inner()?;
511+
// Note: the input string may contain newlines, but `utils::create_multiline_doc`
512+
// _cannot_ be used here because this function will change indentation
513+
// on newlines, which may alter the string content.
508514
Some(add_comment(
509515
RcDoc::as_string(e),
510516
get_comment_at_start(self.loc.span, &mut context.tokens)?,
@@ -583,7 +589,7 @@ impl Doc for Node<Option<Primary>> {
583589
let (d, e) = pair;
584590
Some((
585591
d.append(add_comment(
586-
RcDoc::as_string(","),
592+
RcDoc::text(","),
587593
get_comment_after_end(e.loc.span, &mut context.tokens)?,
588594
RcDoc::nil(),
589595
))
@@ -615,7 +621,7 @@ impl Doc for Node<Option<Primary>> {
615621
let (d, e) = pair;
616622
Some((
617623
d.append(add_comment(
618-
RcDoc::as_string(","),
624+
RcDoc::text(","),
619625
get_comment_after_end(e.loc.span, &mut context.tokens)?,
620626
RcDoc::nil(),
621627
))
@@ -672,7 +678,7 @@ impl Doc for Node<Option<MemAccess>> {
672678
let (d, e) = pair;
673679
Some((
674680
d.append(add_comment(
675-
RcDoc::as_string(","),
681+
RcDoc::text(","),
676682
get_comment_after_end(e.loc.span, &mut context.tokens)?,
677683
RcDoc::nil(),
678684
))

cedar-policy-formatter/src/pprint/fmt.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ pub fn policies_str_to_pretty(ps: &str, config: &Config) -> Result<String> {
120120
.ok_or(miette!("fail to get input policy CST"))?
121121
.0
122122
.iter()
123-
.map(|p| Ok(remove_empty_lines(tree_to_pretty(p, &mut context)?.trim())))
123+
.map(|p| Ok(remove_empty_lines(&tree_to_pretty(p, &mut context)?)))
124124
.collect::<Result<Vec<String>>>()?
125125
.join("\n\n");
126126
// handle comment at the end of a policyset

cedar-policy-formatter/src/pprint/utils.rs

Lines changed: 96 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
use itertools::Itertools;
1818
use pretty::RcDoc;
19+
use regex::Regex;
1920

2021
use super::token::{Comment, WrappedToken};
2122

@@ -24,24 +25,37 @@ pub fn add_brackets<'a>(d: RcDoc<'a>, leftp: RcDoc<'a>, rightp: RcDoc<'a>) -> Rc
2425
leftp.append(d.nest(1)).append(rightp)
2526
}
2627

28+
/// Convert a leading comment to an `RcDoc`, adding leading and trailing newlines.
2729
pub fn get_leading_comment_doc_from_str<'a>(leading_comment: &str) -> RcDoc<'a> {
2830
if leading_comment.is_empty() {
2931
RcDoc::nil()
3032
} else {
31-
let cs: RcDoc<'_> = RcDoc::intersperse(
32-
leading_comment
33-
.trim()
34-
.split('\n')
35-
.map(|c| RcDoc::text(c.to_owned())),
36-
RcDoc::hardline(),
37-
);
38-
RcDoc::hardline().append(cs).append(RcDoc::hardline())
33+
RcDoc::hardline()
34+
.append(create_multiline_doc(leading_comment))
35+
.append(RcDoc::hardline())
3936
}
4037
}
4138

42-
pub fn get_trailing_comment_doc_from_str<'a>(trailing_comment: &str) -> RcDoc<'a> {
39+
/// Convert multiline text into an `RcDoc`. Both `RcDoc::as_string` and
40+
/// `RcDoc::text` allow newlines in the text (although the official
41+
/// documentation says they don't), but the resulting text will maintain its
42+
/// original indentation instead of the new "pretty" indentation.
43+
fn create_multiline_doc<'a>(str: &str) -> RcDoc<'a> {
44+
RcDoc::intersperse(
45+
str.trim().split('\n').map(|c| RcDoc::text(c.to_owned())),
46+
RcDoc::hardline(),
47+
)
48+
}
49+
50+
/// Convert a trailing comment to an `RcDoc`, adding a trailing newline.
51+
/// There is no need to use `create_multiline_doc` because a trailing comment
52+
/// cannot contain newlines.
53+
pub fn get_trailing_comment_doc_from_str<'a>(
54+
trailing_comment: &str,
55+
next_doc: RcDoc<'a>,
56+
) -> RcDoc<'a> {
4357
if trailing_comment.is_empty() {
44-
RcDoc::nil()
58+
next_doc
4559
} else {
4660
RcDoc::space()
4761
.append(RcDoc::text(trailing_comment.trim().to_owned()))
@@ -112,26 +126,83 @@ pub fn get_comment_in_range(span: miette::SourceSpan, tokens: &mut [WrappedToken
112126
.collect()
113127
}
114128

115-
// Wrap doc with comment
129+
/// Wrap an `RcDoc` with comments. If there is a leading comment, then this
130+
/// will introduce a newline bat the start of the `RcDoc`. If there is a
131+
/// trailing comment, then it will introduce a newline at the end.
116132
pub fn add_comment<'a>(d: RcDoc<'a>, comment: Comment, next_doc: RcDoc<'a>) -> RcDoc<'a> {
117133
let leading_comment = comment.leading_comment;
118134
let trailing_comment = comment.trailing_comment;
119135
let leading_comment_doc = get_leading_comment_doc_from_str(&leading_comment);
120-
let trailing_comment_doc: RcDoc<'_> = if trailing_comment.is_empty() {
121-
d.append(next_doc)
122-
} else {
123-
d.append(RcDoc::space())
124-
.append(RcDoc::text(trailing_comment.trim().to_owned()))
125-
.append(RcDoc::hardline())
126-
};
136+
let trailing_comment_doc = get_trailing_comment_doc_from_str(&trailing_comment, next_doc);
137+
leading_comment_doc.append(d).append(trailing_comment_doc)
138+
}
127139

128-
leading_comment_doc.append(trailing_comment_doc.clone())
140+
/// Remove empty lines from the input string, ignoring the first and last lines.
141+
/// (Because of how this function is used in `remove_empty_lines`, the first and
142+
/// last lines may include important spacing information.) This will remove empty
143+
/// lines _everywhere_, including in places where that may not be desired
144+
/// (e.g., in string literals).
145+
fn remove_empty_interior_lines(s: &str) -> String {
146+
let mut new_s = String::new();
147+
if s.starts_with('\n') {
148+
new_s.push_str("\n");
149+
}
150+
new_s.push_str(
151+
s.split_inclusive('\n')
152+
// in the case where `s` does not end in a newline, `!ss.contains('\n')`
153+
// preserves whitespace on the last line
154+
.filter(|ss| !ss.trim().is_empty() || !ss.contains('\n'))
155+
.collect::<Vec<_>>()
156+
.join("")
157+
.as_str(),
158+
);
159+
new_s
129160
}
130161

131-
pub fn remove_empty_lines(s: &str) -> String {
132-
s.lines()
133-
.filter(|ss| !ss.trim().is_empty())
134-
.map(|s| s.to_owned())
135-
.collect::<Vec<String>>()
136-
.join("\n")
162+
/// Remove empty lines, safely handling newlines that occur in quotations.
163+
pub fn remove_empty_lines(text: &str) -> String {
164+
// PANIC SAFETY: this regex pattern is valid
165+
#[allow(clippy::unwrap_used)]
166+
let comment_regex = Regex::new(r"//[^\n]*").unwrap();
167+
// PANIC SAFETY: this regex pattern is valid
168+
#[allow(clippy::unwrap_used)]
169+
let string_regex = Regex::new(r#""(\\.|[^"\\])*"[^\n]*"#).unwrap();
170+
171+
let mut index = 0;
172+
let mut final_text = String::new();
173+
174+
while index < text.len() {
175+
// Check for the next comment and string. The general strategy is to
176+
// call `remove_empty_interior_lines` on all the text _outside_ of
177+
// strings. Comments should be skipped to avoid interpreting a quote in
178+
// a comment as a string.
179+
let comment_match = comment_regex.find_at(text, index);
180+
let string_match = string_regex.find_at(text, index);
181+
match (comment_match, string_match) {
182+
(Some(m1), Some(m2)) => {
183+
// Handle the earlier match
184+
let m = std::cmp::min_by_key(m1, m2, |m| m.start());
185+
// PANIC SAFETY: Slicing `text` is safe since `index <= m.start()` and both are within the bounds of `text`.
186+
#[allow(clippy::indexing_slicing)]
187+
final_text.push_str(&remove_empty_interior_lines(&text[index..m.start()]));
188+
final_text.push_str(m.as_str());
189+
index = m.end();
190+
}
191+
(Some(m), None) | (None, Some(m)) => {
192+
// PANIC SAFETY: Slicing `text` is safe since `index <= m.start()` and both are within the bounds of `text`.
193+
#[allow(clippy::indexing_slicing)]
194+
final_text.push_str(&remove_empty_interior_lines(&text[index..m.start()]));
195+
final_text.push_str(m.as_str());
196+
index = m.end();
197+
}
198+
(None, None) => {
199+
// PANIC SAFETY: Slicing `text` is safe since `index` is within the bounds of `text`.
200+
#[allow(clippy::indexing_slicing)]
201+
final_text.push_str(&remove_empty_interior_lines(&text[index..]));
202+
break;
203+
}
204+
}
205+
}
206+
// Trim the final result to account for dangling newlines
207+
final_text.trim().to_string()
137208
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Test fix for #862 where blank lines in strings were removed.
2+
3+
// The output of the formatter should change string or eid content (including
4+
// removing blank lines) because this will change the policy's semantics. It is
5+
// ok to remove blank lines everywhere else.
6+
7+
permit(principal == User
8+
9+
::
10+
11+
"alice", action, resource
12+
13+
in Folder::"Name
14+
15+
16+
with a newline") when // trailing comment
17+
18+
{
19+
20+
context.foo == "string
21+
22+
with
23+
24+
newlines and other strange characters🐈👍\"
25+
26+
// even something that looks like a comment
27+
28+
"
29+
30+
// Quotes in comments "
31+
32+
// shouldn't matter "
33+
34+
};

0 commit comments

Comments
 (0)