Skip to content

Commit 8e790b3

Browse files
committed
[#176] Haskell: Store node position in the AST
1 parent e7e68ba commit 8e790b3

File tree

1 file changed

+106
-51
lines changed

1 file changed

+106
-51
lines changed

source/src/BNFC/Backend/Haskell/CFtoHappy.hs

Lines changed: 106 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ cf2Happy
4444
-> String -- ^ Generated code.
4545
cf2Happy name absName lexName mode tokenText functor cf = unlines
4646
[ header name absName lexName tokenText
47-
, render $ declarations mode (allEntryPoints cf)
48-
, render $ tokens cf
47+
, render $ declarations mode functor (allEntryPoints cf)
48+
, render $ tokens cf functor
4949
, delimiter
50-
, specialRules absName tokenText cf
50+
, specialRules absName functor tokenText cf
5151
, render $ prRules absName functor (rulesForHappy absName functor cf)
52-
, footer
52+
, footer functor cf
5353
]
5454

5555
-- | Construct the header.
@@ -68,15 +68,23 @@ header modName absName lexName tokenText = unlines $ concat
6868
]
6969

7070
-- | The declarations of a happy file.
71-
-- >>> declarations Standard [Cat "A", Cat "B", ListCat (Cat "B")]
71+
-- >>> declarations Standard False [Cat "A", Cat "B", ListCat (Cat "B")]
7272
-- %name pA A
7373
-- %name pB B
7474
-- %name pListB ListB
7575
-- -- no lexer declaration
7676
-- %monad { Either String } { (>>=) } { return }
7777
-- %tokentype {Token}
78-
declarations :: HappyMode -> [Cat] -> Doc
79-
declarations mode ns = vcat
78+
--
79+
-- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")]
80+
-- %name pA_internal A
81+
-- %name pB_internal B
82+
-- %name pListB_internal ListB
83+
-- -- no lexer declaration
84+
-- %monad { Either String } { (>>=) } { return }
85+
-- %tokentype {Token}
86+
declarations :: HappyMode -> Bool -> [Cat] -> Doc
87+
declarations mode functor ns = vcat
8088
[ vcat $ map generateP ns
8189
, case mode of
8290
Standard -> "-- no lexer declaration"
@@ -85,21 +93,21 @@ declarations mode ns = vcat
8593
"%tokentype" <+> braces (text tokenName)
8694
]
8795
where
88-
generateP n = "%name" <+> parserName n <+> text (identCat n)
96+
generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n)
8997

9098
-- The useless delimiter symbol.
9199
delimiter :: String
92100
delimiter = "\n%%\n"
93101

94102
-- | Generate the list of tokens and their identifiers.
95-
tokens :: CF -> Doc
96-
tokens cf
103+
tokens :: CF -> Bool -> Doc
104+
tokens cf functor
97105
-- Andreas, 2019-01-02: "%token" followed by nothing is a Happy parse error.
98106
-- Thus, if we have no tokens, do not output anything.
99107
| null ts = empty
100108
| otherwise = "%token" $$ (nest 2 $ vcat ts)
101109
where
102-
ts = map prToken (cfTokens cf) ++ map text (specialToks cf)
110+
ts = map prToken (cfTokens cf) ++ map text (specialToks cf functor)
103111
prToken (t,k) = hsep [ convert t, lbrace, text ("PT _ (TS _ " ++ show k ++ ")"), rbrace ]
104112

105113
-- Happy doesn't allow characters such as åäö to occur in the happy file. This
@@ -117,10 +125,10 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
117125
-- >>> constructRule "Foo" False (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
118126
-- ("Exp '+' Exp","Foo.EPlus $1 $3")
119127
--
120-
-- If we're using functors, it adds void value:
128+
-- If we're using functors, it adds position value:
121129
--
122130
-- >>> constructRule "Foo" True (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable)
123-
-- ("Exp '+' Exp","Foo.EPlus () $1 $3")
131+
-- ("Exp '+' Exp","(fst $1, Foo.EPlus (fst $1) (snd $1) (snd $3))")
124132
--
125133
-- List constructors should not be prefixed by the abstract module name:
126134
--
@@ -133,17 +141,31 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) ->
133141
-- Coercion are much simpler:
134142
--
135143
-- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable)
136-
-- ("'(' Exp ')'","$2")
144+
-- ("'(' Exp ')'","(Just (tokenLineCol $1), (snd $2))")
137145
--
138146
constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action)
139147
constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pattern, action)
140148
where
141149
fun = funName fun0
142-
(pattern, metavars) = generatePatterns rhs
143-
action | isCoercion fun = unwords metavars
144-
| isNilCons fun = unwords (qualify fun : metavars)
145-
| functor = unwords (qualify fun : "()" : metavars)
150+
(pattern, metavars) = generatePatterns rhs functor
151+
action | isCoercion fun && not functor = unwords metavars
152+
| isNilCons fun && not functor = unwords (qualify fun : metavars)
153+
| functor = getFunctorAction
146154
| otherwise = unwords (qualify fun : metavars)
155+
-- Commelina, 2020-12-10:
156+
-- replace the previous unimplemented "()"
157+
getFunctorPos rhs = case rhs of
158+
[] -> "Nothing"
159+
(Left _:_) -> "fst $1"
160+
(Right _:_) -> "Just (tokenLineCol $1)"
161+
getFunctorValue
162+
| isCoercion fun = unwords metavars
163+
| isNilCons fun = unwords (qualify fun : metavars)
164+
| otherwise = unwords (qualify fun : ("(" ++ getFunctorPos rhs ++ ")") : metavars)
165+
getFunctorAction =
166+
let pos = getFunctorPos rhs
167+
val = getFunctorValue
168+
in "(" ++ pos ++ ", " ++ val ++ ")"
147169
qualify f
148170
| isConsFun f || isNilCons f = f
149171
| isDefinedRule f = absName ++ "." ++ mkDefName f
@@ -154,14 +176,17 @@ constructRule _ _ (Rule _ _ _ Internal) = undefined -- impossible
154176
-- | Generate patterns and a set of metavariables (de Bruijn indices) indicating
155177
-- where in the pattern the non-terminal are locate.
156178
--
157-
-- >>> generatePatterns [ Left (Cat "Exp"), Right "+", Left (Cat "Exp") ]
179+
-- >>> generatePatterns [ Left (Cat "Exp"), Right "+", Left (Cat "Exp") ] False
158180
-- ("Exp '+' Exp",["$1","$3"])
159181
--
160-
generatePatterns :: SentForm -> (Pattern, [MetaVar])
161-
generatePatterns [] = ("{- empty -}", [])
162-
generatePatterns its =
182+
-- >>> generatePatterns [ Left (Cat "Exp"), Right "+", Left (Cat "Exp") ] True
183+
-- ("Exp '+' Exp",["(snd $1)","(snd $3)"])
184+
--
185+
generatePatterns :: SentForm -> Bool -> (Pattern, [MetaVar])
186+
generatePatterns [] _ = ("{- empty -}", [])
187+
generatePatterns its functor =
163188
( unwords $ for its $ either {-non-term:-} identCat {-term:-} (render . convert)
164-
, [ ('$' : show i) | (i, Left{}) <- zip [1 :: Int ..] its ]
189+
, [ if functor then "(snd $" ++ show i ++ ")" else ('$' : show i) | (i, Left{}) <- zip [1 :: Int ..] its ]
165190
)
166191

167192
-- We have now constructed the patterns and actions,
@@ -186,35 +211,36 @@ generatePatterns its =
186211
--
187212
-- The functor case:
188213
-- >>> prRules "" True [(Cat "Expr", [("Integer", "EInt () $1"), ("Expr '+' Expr", "EPlus () $1 $3")])]
189-
-- Expr :: { (Expr ()) }
214+
-- Expr :: { (Maybe (Int, Int), (Expr (Maybe (Int, Int))) ) }
190215
-- Expr : Integer { EInt () $1 } | Expr '+' Expr { EPlus () $1 $3 }
191216
--
192217
-- A list with coercion: in the type signature we need to get rid of the
193218
-- coercion.
194219
--
195220
-- >>> prRules "" True [(ListCat (CoercCat "Exp" 2), [("Exp2", "(:[]) $1"), ("Exp2 ',' ListExp2","(:) $1 $3")])]
196-
-- ListExp2 :: { [Exp ()] }
221+
-- ListExp2 :: { (Maybe (Int, Int), [Exp (Maybe (Int, Int))] ) }
197222
-- ListExp2 : Exp2 { (:[]) $1 } | Exp2 ',' ListExp2 { (:) $1 $3 }
198223
--
199224
prRules :: ModuleName -> Bool -> Rules -> Doc
200225
prRules absM functor = vsep . map prOne
201226
where
202227
prOne (_ , [] ) = empty -- nt has only internal use
203228
prOne (nt, (p,a):ls) =
204-
hsep [ nt', "::", "{", type' nt, "}" ]
229+
hsep [ nt', "::", "{", if functor then functorType' nt else type' nt, "}" ]
205230
$$ nt' <+> sep (pr ":" (p, a) : map (pr "|") ls)
206231
where
207232
nt' = text (identCat nt)
208233
pr pre (p,a) = hsep [pre, text p, "{", text a , "}"]
209-
type' = catToType qualify $ if functor then "()" else empty
234+
type' = catToType qualify $ if functor then "(Maybe (Int, Int))" else empty
235+
functorType' nt = hsep ["(Maybe (Int, Int), ", type' nt, ")"]
210236
qualify
211237
| null absM = id
212238
| otherwise = ((text absM <> ".") <>)
213239

214240
-- Finally, some haskell code.
215241

216-
footer :: String
217-
footer = unlines $
242+
footer :: Bool -> CF -> String
243+
footer functor cf = unlines $
218244
[ "{"
219245
, ""
220246
, "happyError :: [" ++ tokenName ++ "] -> Either String a"
@@ -231,36 +257,65 @@ footer = unlines $
231257
]
232258
, ""
233259
, "myLexer = tokens"
260+
, if functor then render . vcat $ map mkParserFun (allEntryPoints cf) else ""
234261
, "}"
235262
]
263+
where
264+
mkParserFun cat =
265+
parserName cat <+> "=" <+> "(>>= return . snd)" <+> "." <+> parserName cat <> "_internal"
236266

237267
-- | GF literals.
238-
specialToks :: CF -> [String]
239-
specialToks cf = (`map` literals cf) $ \case
240-
"Ident" -> "L_Ident { PT _ (TV $$) }"
241-
"String" -> "L_quoted { PT _ (TL $$) }"
242-
"Integer" -> "L_integ { PT _ (TI $$) }"
243-
"Double" -> "L_doubl { PT _ (TD $$) }"
244-
"Char" -> "L_charac { PT _ (TC $$) }"
245-
own -> "L_" ++ own ++ " { PT _ (T_" ++ own ++ " " ++ posn ++ ") }"
246-
where posn = if isPositionCat cf own then "_" else "$$"
268+
specialToks :: CF -> Bool -> [String]
269+
specialToks cf functor = (`map` literals cf) $ \t -> case t of
270+
"Ident" -> "L_Ident { PT _ (TV " ++ posn t ++ ") }"
271+
"String" -> "L_quoted { PT _ (TL " ++ posn t ++ ") }"
272+
"Integer" -> "L_integ { PT _ (TI " ++ posn t ++ ") }"
273+
"Double" -> "L_doubl { PT _ (TD " ++ posn t ++ ") }"
274+
"Char" -> "L_charac { PT _ (TC " ++ posn t ++ ") }"
275+
own -> "L_" ++ own ++ " { PT _ (T_" ++ own ++ " " ++ posn own ++ ") }"
276+
where
277+
posn tokenCat = if isPositionCat cf tokenCat || functor then "_" else "$$"
247278

248-
specialRules :: ModuleName -> TokenText -> CF -> String
249-
specialRules absName tokenText cf = unlines . intersperse "" . (`map` literals cf) $ \case
279+
specialRules :: ModuleName -> Bool -> TokenText -> CF -> String
280+
specialRules absName functor tokenText cf = unlines . intersperse "" . (`map` literals cf) $ \t -> case t of
250281
-- "Ident" -> "Ident :: { Ident }"
251282
-- ++++ "Ident : L_ident { Ident $1 }"
252-
"String" -> "String :: { String }"
253-
++++ "String : L_quoted { " ++ stringUnpack "$1" ++ " }"
254-
"Integer" -> "Integer :: { Integer }"
255-
++++ "Integer : L_integ { (read (" ++ stringUnpack "$1" ++ ")) :: Integer }"
256-
"Double" -> "Double :: { Double }"
257-
++++ "Double : L_doubl { (read (" ++ stringUnpack "$1" ++ ")) :: Double }"
258-
"Char" -> "Char :: { Char }"
259-
++++ "Char : L_charac { (read (" ++ stringUnpack "$1" ++ ")) :: Char }"
260-
own -> own ++ " :: { " ++ qualify own ++ "}"
261-
++++ own ++ " : L_" ++ own ++ " { " ++ qualify own ++ posn ++ " }"
262-
where posn = if isPositionCat cf own then " (mkPosToken $1)" else " $1"
283+
"String" -> "String :: { " ++ mkTypePart t ++ " }"
284+
++++ "String : L_quoted { " ++ mkBodyPart t ++ " }"
285+
"Integer" -> "Integer :: { " ++ mkTypePart t ++ " }"
286+
++++ "Integer : L_integ { " ++ mkBodyPart t ++ " }"
287+
"Double" -> "Double :: { " ++ mkTypePart t ++ " }"
288+
++++ "Double : L_doubl { " ++ mkBodyPart t ++ " }"
289+
"Char" -> "Char :: { " ++ mkTypePart t ++ " }"
290+
++++ "Char : L_charac { " ++ mkBodyPart t ++ " }"
291+
own -> own ++ " :: { " ++ mkTypePart (qualify own) ++ " }"
292+
++++ own ++ " : L_" ++ own ++ " { " ++ mkBodyPart t ++ " }"
263293
where
294+
mkTypePart tokenCat = if functor then "(Maybe (Int, Int), " ++ tokenCat ++ ")" else tokenCat
295+
mkBodyPart tokenCat
296+
| null mkPosPart = mkValPart tokenCat
297+
| otherwise = "(" ++ mkPosPart ++ ", " ++ mkValPart tokenCat ++ ")"
298+
mkPosPart = if functor then "Just (tokenLineCol $1)" else ""
299+
mkValPart tokenCat =
300+
case tokenCat of
301+
"String" -> if functor then stringUnpack "(tokenText $1)"
302+
else stringUnpack "$1" -- String never has pos
303+
"Integer" -> if functor then "(read (" ++ stringUnpack "(tokenText $1)" ++ ")) :: Integer"
304+
else "(read (" ++ stringUnpack "$1" ++ ")) :: Integer" -- Integer never has pos
305+
"Double" -> if functor then "(read (" ++ stringUnpack "(tokenText $1)" ++ ")) :: Double"
306+
else "(read (" ++ stringUnpack "$1" ++ ")) :: Double" -- Double never has pos
307+
"Char" -> if functor then "(read (" ++ stringUnpack "(tokenText $1)" ++ ")) :: Char"
308+
else "(read (" ++ stringUnpack "$1" ++ ")) :: Char" -- Char never has pos
309+
own ->
310+
case functor of
311+
False ->
312+
case isPositionCat cf tokenCat of
313+
False -> qualify own ++ " $1"
314+
True -> qualify own ++ " (mkPosToken $1)"
315+
True ->
316+
case isPositionCat cf tokenCat of
317+
False -> qualify own ++ " (tokenText $1)"
318+
True -> qualify own ++ " (mkPosToken $1)"
264319
stringUnpack = tokenTextUnpack tokenText
265320
qualify
266321
| null absName = id

0 commit comments

Comments
 (0)