Skip to content

Commit f825515

Browse files
committed
[ WIP #349 ] unify C and C++ lexer/parser: CPP/STL part
1 parent da179d6 commit f825515

File tree

4 files changed

+184
-133
lines changed

4 files changed

+184
-133
lines changed

source/BNFC.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ library
131131
FlexibleContexts
132132
FlexibleInstances
133133
LambdaCase
134+
MultiWayIf
134135
NamedFieldPuns
135136
OverloadedStrings
136137
PatternGuards

source/src/BNFC/Backend/C/CFtoBisonC.hs

Lines changed: 152 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,20 @@ module BNFC.Backend.C.CFtoBisonC
2626

2727
import Prelude hiding ((<>))
2828

29-
import Data.Char (toLower)
30-
import Data.Foldable (toList)
31-
import Data.List (intercalate, nub)
32-
import Data.Maybe (fromMaybe)
29+
import Data.Char ( toLower, isUpper )
30+
import Data.Foldable ( toList )
31+
import Data.List ( intercalate, nub )
32+
import Data.Maybe ( fromMaybe )
3333
import qualified Data.Map as Map
34-
import System.FilePath ((<.>))
34+
import System.FilePath ( (<.>) )
3535

3636
import BNFC.CF
3737
import BNFC.Backend.Common.NamedVariables hiding (varName)
38-
import BNFC.Backend.C.CFtoFlexC (ParserMode(..), cParser, parserHExt, parserName, parserPackage)
38+
import BNFC.Backend.C.CFtoFlexC (ParserMode(..), cParser, stlParser, parserHExt, parserName, parserPackage)
3939
import BNFC.Backend.CPP.STL.STLUtils
40-
import BNFC.Options (RecordPositions(..))
40+
import BNFC.Options (RecordPositions(..), InPackage)
4141
import BNFC.PrettyPrint
42-
import BNFC.Utils ((+++), whenJust)
42+
import BNFC.Utils ((+++), for, unless, when, whenJust)
4343

4444
--This follows the basic structure of CFtoHappy.
4545

@@ -53,11 +53,11 @@ type MetaVar = String
5353
cf2Bison :: RecordPositions -> ParserMode -> CF -> SymMap -> String
5454
cf2Bison rp mode cf env = unlines
5555
[ header mode cf
56-
, render $ union mode $ allParserCatsNorm cf
56+
, render $ union mode $ posCats ++ allParserCatsNorm cf
5757
, unionDependentCode mode
5858
, "%token _ERROR_"
5959
, tokens (map fst $ tokenPragmas cf) env
60-
, declarations cf
60+
, declarations mode cf
6161
, specialToks cf
6262
, startSymbol cf
6363
, ""
@@ -66,11 +66,19 @@ cf2Bison rp mode cf env = unlines
6666
, prRules $ rulesForBison rp mode cf env
6767
, "%%"
6868
, ""
69+
, nsStart inPackage
6970
, entryCode mode cf
71+
, nsEnd inPackage
7072
]
7173
where
72-
name = fromMaybe undefined $ parserName mode
74+
name = parserName mode
75+
inPackage = parserPackage mode
76+
posCats
77+
| stlParser mode = map TokenCat $ positionCats cf
78+
| otherwise = []
7379

80+
positionCats :: CF -> [String]
81+
positionCats cf = [ wpThing name | TokenReg name True _ <- cfgPragmas cf ]
7482

7583
header :: ParserMode -> CF -> String
7684
header mode cf = unlines $ concat
@@ -80,8 +88,8 @@ header mode cf = unlines $ concat
8088
, "%defines \"" ++ ("Bison" <.> h) ++ "\""
8189
]
8290
, whenJust (parserPackage mode) $ \ ns ->
83-
[ "%name-prefix = \"" ++ ns ++ "yy\""
84-
, " /* From Bison 2.6: %define api.prefix {" ++ ns ++ "yy} */"
91+
[ "%name-prefix = \"" ++ ns ++ "\""
92+
, " /* From Bison 2.6: %define api.prefix {" ++ ns ++ "} */"
8593
]
8694
, [ ""
8795
, "/* Reentrant parser */"
@@ -125,9 +133,12 @@ header mode cf = unlines $ concat
125133
, ""
126134
, "extern yyscan_t " ++ name ++ "_initialize_lexer(FILE * inp);"
127135
, ""
128-
, "/* List reversal functions. */"
136+
]
137+
, unless (stlParser mode)
138+
[ "/* List reversal functions. */"
129139
, concatMap (reverseList mode) $ filter isList $ allParserCatsNorm cf
130-
, "/* End C preamble code */"
140+
]
141+
, [ "/* End C preamble code */"
131142
, "%}"
132143
]
133144
]
@@ -138,7 +149,7 @@ header mode cf = unlines $ concat
138149
-- Found old comment:
139150
-- -- M.F. 2004-09-17 changed allEntryPoints to allCatsIdNorm. Seems to fix the [Ty2] bug.
140151
h = parserHExt mode
141-
name = fromMaybe undefined $ parserName mode
152+
name = parserName mode
142153

143154
-- | Code that needs the @YYSTYPE@ defined by the @%union@ pragma.
144155
--
@@ -152,7 +163,7 @@ unionDependentCode mode = unlines
152163
, "%}"
153164
]
154165
where
155-
name = fromMaybe undefined $ parserName mode -- TODO
166+
name = parserName mode
156167

157168
errorHandler :: String -> String
158169
errorHandler name = unlines
@@ -185,11 +196,11 @@ parseMethod mode cf cat = unlines $ concat
185196
, body True
186197
]
187198
where
188-
name = fromMaybe undefined $ parserName mode
199+
name = parserName mode
189200
body stringParser = concat
190201
[ [ "{"
191202
, " YYSTYPE result;"
192-
, " yyscan_t scanner = " ++ name ++ "_initialize_lexer(", file, ");"
203+
, " yyscan_t scanner = " ++ name ++ "_initialize_lexer(" ++ file ++ ");"
193204
, " if (!scanner) {"
194205
, " fprintf(stderr, \"Failed to initialize lexer.\\n\");"
195206
, " return 0;"
@@ -205,21 +216,32 @@ parseMethod mode cf cat = unlines $ concat
205216
, " }"
206217
, " else"
207218
, " { /* Success */"
208-
, " return" +++ res ++ ";"
219+
]
220+
, revOpt
221+
, [ " return" +++ res ++ ";"
209222
, " }"
210223
, "}"
211224
]
212225
]
213226
where
214227
file | stringParser = "0"
215228
| otherwise = "inp"
229+
stl = stlParser mode
216230
ncat = normCat cat
217231
dat0 = identCat ncat
218232
dat = if cParser mode then dat0 else dat0 ++ "*"
219233
parser = identCat cat
220234
res0 = concat [ "result.", varName ncat ]
221-
revRes = "reverse" ++ dat0 ++ "(" ++ res0 ++ ")"
222-
res = if cat `elem` cfgReversibleCats cf then revRes else res0
235+
-- Reversing the result
236+
isReversible = cat `elem` cfgReversibleCats cf
237+
-- C and NoSTL
238+
res
239+
| not stl, isReversible
240+
= "reverse" ++ dat0 ++ "(" ++ res0 ++ ")"
241+
| otherwise = res0
242+
-- STL: Vectors are snoc lists
243+
revOpt = when (stl && isList cat && not isReversible)
244+
[ "std::reverse(" ++ res ++ "->begin(), " ++ res ++"->end());" ]
223245

224246
--This method generates list reversal functions for each list type.
225247
reverseList :: ParserMode -> Cat -> String
@@ -248,7 +270,7 @@ reverseList mode c0 = unlines
248270
-- yylval. For efficiency, we may want to only include used categories here.
249271
--
250272
-- >>> let foo = Cat "Foo"
251-
-- >>> union Nothing [foo, ListCat foo]
273+
-- >>> union (CParser True "") [foo, ListCat foo]
252274
-- %union
253275
-- {
254276
-- int _int;
@@ -265,7 +287,7 @@ reverseList mode c0 = unlines
265287
-- ListFoo* listfoo_;
266288
--
267289
-- >>> let foo2 = CoercCat "Foo" 2
268-
-- >>> union (CppParser Nothing) [foo, ListCat foo, foo2, ListCat foo2]
290+
-- >>> union (CppParser Nothing "") [foo, ListCat foo, foo2, ListCat foo2]
269291
-- %union
270292
-- {
271293
-- int _int;
@@ -315,12 +337,16 @@ unionBuiltinTokens =
315337
, "char* _string;"
316338
]
317339

318-
--declares non-terminal types.
319-
declarations :: CF -> String
320-
declarations cf = concatMap (typeNT cf) (allParserCats cf)
321-
where --don't define internal rules
322-
typeNT cf nt | rulesForCat cf nt /= [] = "%type <" ++ varName (normCat nt) ++ "> " ++ identCat nt ++ "\n"
323-
typeNT _ _ = ""
340+
-- | @%type@ declarations for non-terminal types.
341+
declarations :: ParserMode -> CF -> String
342+
declarations mode cf = unlines $ map typeNT $
343+
posCats ++
344+
filter (not . null . rulesForCat cf) (allParserCats cf) -- don't define internal rules
345+
where
346+
typeNT nt = "%type <" ++ varName nt ++ "> " ++ identCat nt
347+
posCats
348+
| stlParser mode = map TokenCat $ positionCats cf
349+
| otherwise = []
324350

325351
--declares terminal types.
326352
-- token name "literal"
@@ -359,8 +385,17 @@ startSymbol cf = "%start" +++ identCat (firstEntry cf)
359385
--The following functions are a (relatively) straightforward translation
360386
--of the ones in CFtoHappy.hs
361387
rulesForBison :: RecordPositions -> ParserMode -> CF -> SymMap -> Rules
362-
rulesForBison rp mode cf env = map mkOne $ ruleGroups cf where
388+
rulesForBison rp mode cf env = map mkOne (ruleGroups cf) ++ posRules
389+
where
363390
mkOne (cat,rules) = constructRule rp mode cf env rules cat
391+
posRules :: Rules
392+
posRules
393+
| CppParser inPackage _ <- mode = for (positionCats cf) $ \ n -> (TokenCat n,
394+
[( Map.findWithDefault n (Tokentype n) env
395+
, addResult cf (TokenCat n) $ concat
396+
[ "$$ = new ", nsScope inPackage, n, "($1, @$.first_line);" ]
397+
)])
398+
| otherwise = []
364399

365400
-- For every non-terminal, we construct a set of rules.
366401
constructRule
@@ -369,65 +404,108 @@ constructRule
369404
-> NonTerminal -- ^ ... this non-terminal.
370405
-> (NonTerminal,[(Pattern,Action)])
371406
constructRule rp mode cf env rules nt = (nt,) $
372-
[ (p,) $ addResult $ generateAction rp mode (identCat (normCat nt)) (funRule r) b m
407+
[ (p,) $ addResult cf nt $ generateAction rp mode (identCat (normCat nt)) (funRule r) b m
373408
| r0 <- rules
374409
, let (b,r) = if isConsFun (funRule r0) && valCat r0 `elem` cfgReversibleCats cf
375410
then (True, revSepListRule r0)
376411
else (False, r0)
377-
, let (p,m) = generatePatterns cf env r
412+
, let (p,m) = generatePatterns mode cf env r
378413
]
379-
where
380-
-- Add action if we parse an entrypoint non-terminal:
381-
-- Set field in result record to current parse.
382-
addResult a =
383-
if nt `elem` toList (allEntryPoints cf)
384-
-- Note: Bison has only a single entrypoint,
385-
-- but BNFC works around this by adding dedicated parse methods for all entrypoints.
386-
-- Andreas, 2021-03-24: But see #350: bison still uses only the @%start@ non-terminal.
387-
then concat [ a, " result->", varName (normCat nt), " = $$;" ]
388-
else a
414+
415+
-- | Add action if we parse an entrypoint non-terminal:
416+
-- Set field in result record to current parse.
417+
addResult :: CF -> NonTerminal -> Action -> Action
418+
addResult cf nt a =
419+
if nt `elem` toList (allEntryPoints cf)
420+
-- Note: Bison has only a single entrypoint,
421+
-- but BNFC works around this by adding dedicated parse methods for all entrypoints.
422+
-- Andreas, 2021-03-24: But see #350: bison still uses only the @%start@ non-terminal.
423+
then concat [ a, " result->", varName (normCat nt), " = $$;" ]
424+
else a
425+
426+
-- | Switch between STL or not.
427+
generateAction :: IsFun a => RecordPositions -> ParserMode -> String -> a -> Bool -> [(MetaVar, Bool)] -> Action
428+
generateAction rp = \case
429+
CppParser ns _ -> generateActionSTL rp ns
430+
CParser b _ -> \ nt f r -> generateActionC rp (not b) nt f r . map fst
389431

390432
-- | Generates a string containing the semantic action.
391-
-- >>> generateAction NoRecordPositions (CParser False "") "Foo" "Bar" False ["$1"]
392-
-- "make_Bar($1);"
393-
-- >>> generateAction NoRecordPositions (CParser False "") "Foo" "_" False ["$1"]
394-
-- "$1;"
395-
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "[]" False []
396-
-- "0;"
397-
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:[])" False ["$1"]
398-
-- "make_ListFoo($1, 0);"
399-
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:)" False ["$1","$2"]
400-
-- "make_ListFoo($1, $2);"
401-
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:)" True ["$1","$2"]
402-
-- "make_ListFoo($2, $1);"
403-
generateAction :: IsFun a => RecordPositions -> ParserMode -> String -> a -> Bool -> [MetaVar] -> Action
404-
generateAction rp mode nt f b ms
405-
| isCoercion f = unwords ms ++ ";" ++ loc
406-
| isNilFun f = "0;"
407-
| isOneFun f = concat [new, nt, "(", intercalate ", " ms', ", 0);"]
408-
| isConsFun f = concat [new, nt, "(", intercalate ", " ms', ");"]
409-
| otherwise = concat [new, funName f, "(", intercalate ", " ms', ");", loc]
433+
-- >>> generateActionC NoRecordPositions False "Foo" "Bar" False ["$1"]
434+
-- "$$ = new Bar($1);"
435+
-- >>> generateActionC NoRecordPositions True "Foo" "Bar" False ["$1"]
436+
-- "$$ = make_Bar($1);"
437+
-- >>> generateActionC NoRecordPositions True "Foo" "_" False ["$1"]
438+
-- "$$ = $1;"
439+
-- >>> generateActionC NoRecordPositions True "ListFoo" "[]" False []
440+
-- "$$ = 0;"
441+
-- >>> generateActionC NoRecordPositions True "ListFoo" "(:[])" False ["$1"]
442+
-- "$$ = make_ListFoo($1, 0);"
443+
-- >>> generateActionC NoRecordPositions True "ListFoo" "(:)" False ["$1","$2"]
444+
-- "$$ = make_ListFoo($1, $2);"
445+
-- >>> generateActionC NoRecordPositions True "ListFoo" "(:)" True ["$1","$2"]
446+
-- "$$ = make_ListFoo($2, $1);"
447+
generateActionC :: IsFun a => RecordPositions -> Bool -> String -> a -> Bool -> [MetaVar] -> Action
448+
generateActionC rp cParser nt f b ms
449+
| isCoercion f = "$$ = " ++ unwords ms ++ ";" ++ loc
450+
| isNilFun f = "$$ = 0;"
451+
| isOneFun f = concat ["$$ = ", new nt, "(", intercalate ", " ms', ", 0);"]
452+
| isConsFun f = concat ["$$ = ", new nt, "(", intercalate ", " ms', ");"]
453+
| otherwise = concat ["$$ = ", new (funName f), "(", intercalate ", " ms', ");", loc]
410454
where
411455
ms' = if b then reverse ms else ms
412-
loc = if rp == RecordPositions then " $$->line_number = @$.first_line; $$->char_number = @$.first_column;" else ""
413-
new = if cParser mode then "make_" else "new "
456+
loc | RecordPositions <- rp
457+
= " $$->line_number = @$.first_line; $$->char_number = @$.first_column;"
458+
| otherwise
459+
= ""
460+
new :: String -> String
461+
new | cParser = ("make_" ++)
462+
| otherwise = \ s -> if isUpper (head s) then "new " ++ s else s
463+
464+
generateActionSTL :: IsFun a => RecordPositions -> InPackage -> String -> a -> Bool -> [(MetaVar,Bool)] -> Action
465+
generateActionSTL rp inPackage nt f b mbs = reverses ++
466+
if | isCoercion f -> concat ["$$ = ", unwords ms, ";", loc]
467+
| isNilFun f -> concat ["$$ = ", "new ", scope, nt, "();"]
468+
| isOneFun f -> concat ["$$ = ", "new ", scope, nt, "(); $$->push_back($1);"]
469+
| isConsFun f, b -> "$1->push_back("++ lastms ++ "); $$ = $1;"
470+
| isConsFun f -> lastms ++ "->push_back(" ++ head ms ++ "); $$ = " ++ lastms ++ ";" ---- not left rec
471+
| isDefinedRule f -> concat ["$$ = ", scope, funName f, "(", intercalate ", " ms, ");" ]
472+
| otherwise -> concat ["$$ = ", "new ", scope, funName f, "(", intercalate ", " ms, ");", loc]
473+
where
474+
ms = map fst mbs
475+
lastms = last ms
476+
loc | RecordPositions <- rp
477+
= " $$->line_number = @$.first_line; $$->char_number = @$.first_column;"
478+
| otherwise
479+
= ""
480+
reverses = unwords ["std::reverse(" ++ m ++"->begin(),"++m++"->end()) ;" | (m, True) <- mbs]
481+
scope = nsScope inPackage
414482

415483
-- Generate patterns and a set of metavariables indicating
416484
-- where in the pattern the non-terminal
417-
generatePatterns :: CF -> SymMap -> Rule -> (Pattern,[MetaVar])
418-
generatePatterns cf env r = case rhsRule r of
485+
generatePatterns :: ParserMode -> CF -> SymMap -> Rule -> (Pattern,[(MetaVar,Bool)])
486+
generatePatterns mode cf env r = case rhsRule r of
419487
[] -> ("/* empty */",[])
420488
its -> (unwords (map mkIt its), metas its)
421489
where
422-
mkIt i = case i of
423-
Left (TokenCat s) -> fromMaybe (typeName s) $ Map.lookup (Tokentype s) env
490+
stl = stlParser mode
491+
mkIt = \case
492+
Left (TokenCat s)
493+
| stl && isPositionCat cf s
494+
-> typeName s
495+
| otherwise -> Map.findWithDefault (typeName s) (Tokentype s) env
424496
Left c -> identCat c
425-
Right s -> fromMaybe s $ Map.lookup (Keyword s) env
426-
metas its = [revIf c ('$': show i) | (i,Left c) <- zip [1 :: Int ..] its]
427-
revIf c m = if not (isConsFun (funRule r)) && elem c revs
497+
Right s -> Map.findWithDefault s (Keyword s) env
498+
metas its = [(revIf c ('$': show i), revert c) | (i, Left c) <- zip [1 :: Int ..] its]
499+
-- C and C++/NoSTL: call reverse function
500+
revIf c m = if not stl && isntCons && elem c revs
428501
then "reverse" ++ identCat (normCat c) ++ "(" ++ m ++ ")"
429502
else m -- no reversal in the left-recursive Cons rule itself
430-
revs = cfgReversibleCats cf
503+
-- C++/STL: flag if reversal is necessary
504+
-- notice: reversibility with push_back vectors is the opposite
505+
-- of right-recursive lists!
506+
revert c = isntCons && isList c && notElem c revs
507+
revs = cfgReversibleCats cf
508+
isntCons = not $ isConsFun $ funRule r
431509

432510
-- We have now constructed the patterns and actions,
433511
-- so the only thing left is to merge them into one string.
@@ -436,11 +514,11 @@ prRules :: Rules -> String
436514
prRules [] = []
437515
prRules ((_, []):rs) = prRules rs --internal rule
438516
prRules ((nt, (p,a) : ls):rs) =
439-
unwords [nt', ":" , p, "{ $$ =", a, "}", '\n' : pr ls] ++ ";\n" ++ prRules rs
517+
unwords [nt', ":" , p, "{", a, "}", '\n' : pr ls] ++ ";\n" ++ prRules rs
440518
where
441519
nt' = identCat nt
442520
pr [] = []
443-
pr ((p,a):ls) = unlines [unwords [" |", p, "{ $$ =", a , "}"]] ++ pr ls
521+
pr ((p,a):ls) = unlines [unwords [" |", p, "{", a , "}"]] ++ pr ls
444522

445523
--Some helper functions.
446524
resultName :: String -> String

0 commit comments

Comments
 (0)