projectfluent · stasm · Nov 8, 2018 · Nov 7, 2018 · Nov 7, 2018 · Nov 8, 2018
diff --git a/spec/fluent.ebnf b/spec/fluent.ebnf
@@ -1,6 +1,6 @@
 
 /* An FTL file defines a Resource consisting of Entries. */
-Resource            ::= (Entry | blank_block | junk_line)*
+Resource            ::= (Entry | blank_block | Junk)*
 
 /* Entries are the main building blocks of Fluent. They define translations and
  * contextual and semantic information about the translations. During the AST
@@ -18,7 +18,13 @@ Term                ::= "-" Identifier blank_inline? "=" blank_inline? Value Att
  * the AST construction. */
 CommentLine         ::= ("###" | "##" | "#") ("\u0020" /.*/)? line_end
 
-/* Adjacent junk_lines are joined into FTL.Junk during the AST construction. */
+/* Junk represents unparsed content.
+ *
+ * Junk is parsed line-by-line until a line is found which looks like it might
+ * be a beginning of a new message, term, or a comment. Any whitespace
+ * following a broken Entry is also considered part of Junk.
+ */
+Junk                ::= junk_line (junk_line - "#" - "-" - [a-zA-Z])*
 junk_line           ::= /[^\n]*/ ("\u000A" | EOF)
 
 /* Attributes of Messages and Terms. */

diff --git a/syntax/abstract.mjs b/syntax/abstract.mjs
@@ -57,7 +57,6 @@ export function list_into(Type) {
                 always(new FTL.Resource(
                     entries
                         .reduce(join_adjacent(
-                            FTL.Junk,
                             FTL.Comment,
                             FTL.GroupComment,
                             FTL.ResourceComment), [])
@@ -155,9 +154,6 @@ function join_of_type(Type, ...elements) {
         case FTL.ResourceComment:
             return elements.reduce((a, b) =>
                 new Type(a.content + "\n" + b.content));
-        case FTL.Junk:
-            return elements.reduce((a, b) =>
-                new Type(a.content + b.content));
     }
 }
 

diff --git a/syntax/grammar.mjs b/syntax/grammar.mjs
@@ -16,7 +16,7 @@ let Resource = defer(() =>
         either(
             Entry,
             blank_block,
-            junk_line))
+            Junk))
     .chain(list_into(FTL.Resource)));
 
 /* ------------------------------------------------------------------------- */
@@ -84,16 +84,33 @@ let CommentLine = defer(() =>
     .map(keep_abstract)
     .chain(list_into(FTL.Comment)));
 
-/* ------------------------------------------------------------------------- */
-/* Adjacent junk_lines are joined into FTL.Junk during the AST construction. */
+/* -------------------------------------------------------------------------- */
+/* Junk represents unparsed content.
+ *
+ * Junk is parsed line-by-line until a line is found which looks like it might
+ * be a beginning of a new message, term, or a comment. Any whitespace
+ * following a broken Entry is also considered part of Junk.
+ */
+let Junk = defer(() =>
+    sequence(
+        junk_line,
+        repeat(
+            and(
+                not(charset("a-zA-Z")),
+                not(string("-")),
+                not(string("#")),
+                junk_line)))
+    .map(flatten(1))
+    .map(join)
+    .chain(into(FTL.Junk)));
+
 let junk_line =
     sequence(
         regex(/[^\n]*/),
         either(
             string("\u000A"),
             eof()))
-    .map(join)
-    .chain(into(FTL.Junk));
+    .map(join);
 
 /* --------------------------------- */
 /* Attributes of Messages and Terms. */

diff --git a/test/fixtures/astral.json b/test/fixtures/astral.json
@@ -154,7 +154,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "err-😂 = Value\n"
+            "content": "err-😂 = Value\n\n"
         },
         {
             "type": "Comment",
@@ -163,7 +163,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "err-invalid-expression = { 😂 }\n"
+            "content": "err-invalid-expression = { 😂 }\n\n"
         },
         {
             "type": "Comment",

diff --git a/test/fixtures/call_expressions.json b/test/fixtures/call_expressions.json
@@ -70,7 +70,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "mixed-case-callee = {Function()}\n"
+            "content": "mixed-case-callee = {Function()}\n\n"
         },
         {
             "type": "Comment",
@@ -88,7 +88,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "variable-callee = {$variable()}\n"
+            "content": "variable-callee = {$variable()}\n\n"
         },
         {
             "type": "GroupComment",
@@ -323,7 +323,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "shuffled-args = {FUN(1, x: 1, \"a\", y: \"Y\", msg)}\n"
+            "content": "shuffled-args = {FUN(1, x: 1, \"a\", y: \"Y\", msg)}\n\n"
         },
         {
             "type": "Comment",
@@ -332,7 +332,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "duplicate-named-args = {FUN(x: 1, x: \"X\")}\n"
+            "content": "duplicate-named-args = {FUN(x: 1, x: \"X\")}\n\n\n"
         },
         {
             "type": "GroupComment",
@@ -1063,7 +1063,17 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "one-argument = {FUN(1,,)}\nmissing-arg = {FUN(,)}\nmissing-sparse-arg = {FUN(   ,   )}\n"
+            "content": "one-argument = {FUN(1,,)}\n"
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "missing-arg = {FUN(,)}\n"
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "missing-sparse-arg = {FUN(   ,   )}\n\n\n"
         },
         {
             "type": "GroupComment",

diff --git a/test/fixtures/crlf.json b/test/fixtures/crlf.json
@@ -61,7 +61,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "err03 = { \"str\r\n"
+            "content": "err03 = { \"str\r\n\r\n"
         },
         {
             "type": "Comment",

diff --git a/test/fixtures/escaped_characters.json b/test/fixtures/escaped_characters.json
@@ -169,7 +169,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "unknown-escape = {\"\\x\"}\n"
+            "content": "unknown-escape = {\"\\x\"}\n\n"
         },
         {
             "type": "GroupComment",

diff --git a/test/fixtures/junk.ftl b/test/fixtures/junk.ftl
@@ -1,4 +1,21 @@
+## Two adjacent Junks.
+err01 = {1x}
+err02 = {2x}
+
+# A single Junk.
+err03 = {1x
+2
+
+# A single Junk.
 ą=Invalid identifier
 ć=Another one
 
-key01 = {
+# The COMMENT ends this junk.
+err04 = {
+# COMMENT
+
+# The COMMENT ends this junk.
+# The closing brace is a separate Junk.
+err04 = {
+# COMMENT
+}
diff --git a/test/fixtures/junk.json b/test/fixtures/junk.json
@@ -1,15 +1,68 @@
 {
     "type": "Resource",
     "body": [
+        {
+            "type": "GroupComment",
+            "content": "Two adjacent Junks."
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "err01 = {1x}\n"
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "err02 = {2x}\n\n"
+        },
+        {
+            "type": "Comment",
+            "content": "A single Junk."
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "err03 = {1x\n2\n\n"
+        },
+        {
+            "type": "Comment",
+            "content": "A single Junk."
+        },
         {
             "type": "Junk",
             "annotations": [],
-            "content": "ą=Invalid identifier\nć=Another one\n"
+            "content": "ą=Invalid identifier\nć=Another one\n\n"
+        },
+        {
+            "type": "Comment",
+            "content": "The COMMENT ends this junk."
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "err04 = {\n"
+        },
+        {
+            "type": "Comment",
+            "content": "COMMENT"
+        },
+        {
+            "type": "Comment",
+            "content": "The COMMENT ends this junk.\nThe closing brace is a separate Junk."
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "err04 = {\n"
+        },
+        {
+            "type": "Comment",
+            "content": "COMMENT"
         },
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key01 = {\n"
+            "content": "}\n"
         }
     ]
 }
diff --git a/test/fixtures/leading_dots.json b/test/fixtures/leading_dots.json
@@ -173,7 +173,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "    .Continued\n"
+            "content": "    .Continued\n\n"
         },
         {
             "type": "Comment",
@@ -182,7 +182,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key08 =\n    .Value\n"
+            "content": "key08 =\n    .Value\n\n"
         },
         {
             "type": "Comment",
@@ -191,7 +191,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key09 =\n    .Value\n    Continued\n"
+            "content": "key09 =\n    .Value\n    Continued\n\n"
         },
         {
             "type": "Message",
@@ -410,7 +410,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key16 =\n    { 1 ->\n       *[one]\n           .Value\n    }\n"
+            "content": "key16 =\n    { 1 ->\n       *[one]\n           .Value\n    }\n\n"
         },
         {
             "type": "Comment",
@@ -419,7 +419,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key17 =\n    { 1 ->\n       *[one] Value\n           .Continued\n    }\n"
+            "content": "key17 =\n    { 1 ->\n       *[one] Value\n           .Continued\n    }\n\n"
         },
         {
             "type": "Comment",
@@ -428,7 +428,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key18 =\n.Value\n"
+            "content": "key18 =\n.Value\n\n"
         },
         {
             "type": "Message",

diff --git a/test/fixtures/member_expressions.json b/test/fixtures/member_expressions.json
@@ -70,7 +70,12 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "variant-expression = {msg[case]}\nattribute-expression = {-term.attr}\n"
+            "content": "variant-expression = {msg[case]}\n"
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "attribute-expression = {-term.attr}\n"
         }
     ]
 }
diff --git a/test/fixtures/messages.json b/test/fixtures/messages.json
@@ -234,7 +234,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "key07 =\n"
+            "content": "key07 =\n\n"
         },
         {
             "type": "Comment",

diff --git a/test/fixtures/mixed_entries.json b/test/fixtures/mixed_entries.json
@@ -61,7 +61,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "ą=Invalid identifier\nć=Another one\n"
+            "content": "ą=Invalid identifier\nć=Another one\n\n"
         },
         {
             "type": "Message",
@@ -91,7 +91,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "    .attr = Dangling attribute\n"
+            "content": "    .attr = Dangling attribute\n\n"
         },
         {
             "type": "Message",

diff --git a/test/fixtures/placeables.json b/test/fixtures/placeables.json
@@ -80,7 +80,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "unmatched-open1 = { 1\n"
+            "content": "unmatched-open1 = { 1\n\n"
         },
         {
             "type": "Comment",
@@ -89,7 +89,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "unmatched-open2 = {{ 1 }\n"
+            "content": "unmatched-open2 = {{ 1 }\n\n"
         },
         {
             "type": "Comment",
@@ -98,7 +98,7 @@
         {
             "type": "Junk",
             "annotations": [],
-            "content": "unmatched-close1 = 1 }\n"
+            "content": "unmatched-close1 = 1 }\n\n"
         },
         {
             "type": "Comment",