I have a grammar which allows implicit multiplication, (1+2)(3+4)
is the same as (1+2)*(3+4)
or (1+2)7
is the same as (1+2)*7
How do I implement this in Haskell? Here is what I have so far:
import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token
languageDef =
emptyDef { Token.identStart = letter
, Token.identLetter = alphaNum
, Token.reservedOpNames = ["+", "*"]
}
lexer = Token.makeTokenParser languageDef
reservedOp = Token.reservedOp lexer
parens = Token.parens lexer
integer = Token.integer lexer
data Expr = Const Int
| Binary BinOp Expr Expr
deriving (Show)
data BinOp = Add | Multiply
deriving (Show)
expression = buildExpressionParser operators term
operators = [ [Infix (reservedOp "*" >> return (Binary Multiply)) AssocLeft]
, [Infix (reservedOp "+" >> return (Binary Add )) AssocLeft]
]
term = liftM (Const . fromIntegral) integer
<|> parens expression
<|> (do e1 <- expression
e2 <- term
return $ Binary Multiply e1 e2)
parseString str =
case parse expression "" str of
Left e -> error $ show e
Right r -> r
but it doesn't work, I have an error while parsing, when I try to parse ((1 + 5) 8)
I have unexpected "8" expecting operator or ")"
I can't show a solution with parsec
, but I have one for megaparsec
. The general idea is based on an answer for a similar question where OP was using FParsec (F#).
The idea is to split the expression parser into two where one handles all operators with higher precedence than the implicit operator and the other handles the rest.
Utilizing the megaparsec expression parser (module Control.Monad.Combinators.Expr
, package parser-combinators
) it can be achieved like so:
type Parser = Parsec Void String
makeExprParser' :: (Parser a -> Parser a)
-> [[Operator Parser a]]
-> [[Operator Parser a]]
-> (a -> a -> a)
-> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
where hi = makeExprParser hiTerm hiOps
lo = makeExprParser loTerm loOps
hiTerm = termf lo
loTerm = some hi <&> foldr1 implicitf
Below is the full code where I used it to parse simple regular expressions (the implicit op being concatenation):
module MyGrep.Parser (parseRegex) where
import Control.Monad
import Control.Monad.Combinators.Expr
import Data.Bifunctor
import Data.Functor ((<&>), ($>))
import Data.List (intersperse)
import Data.Maybe
import Data.Void (Void)
import MyGrep.NFA.Base qualified as NFA
import MyGrep.NFA.Build qualified as NFA
import MyGrep.Util (sortPair)
import Text.Megaparsec
import Text.Megaparsec.Char
type Parser = Parsec Void String
parseRegex :: String -> Either String NFA.StateB
parseRegex = first errorBundlePretty . runParser regex' ""
regex' :: Parser NFA.StateB
regex' = do
start <- optStartAnchor
inner <- regex
end <- optEndAnchor <* eof
return $ start <> inner <> end
optStartAnchor :: Parser NFA.StateB
optStartAnchor = optional (char '^') <&> maybe NFA.anyString (const mempty)
optEndAnchor :: Parser NFA.StateB
optEndAnchor = optional (char '$') <&> maybe NFA.anyString (const mempty)
hiOpTbl :: [[Operator Parser NFA.StateB]]
hiOpTbl = [[Postfix (char '*' $> NFA.zeroOrMore),
Postfix (char '+' $> NFA.oneOrMore),
Postfix (char '?' $> NFA.zeroOrOne)]]
loOpTbl :: [[Operator Parser NFA.StateB]]
loOpTbl = [[InfixL (char '|' $> NFA.alternation)]]
implicitOp :: NFA.StateB -> NFA.StateB -> NFA.StateB
implicitOp = (<>)
regex :: Parser NFA.StateB
regex = makeExprParser' term hiOpTbl loOpTbl implicitOp
term :: Parser NFA.StateB -> Parser NFA.StateB
term term' = choice [
group term' <&> fromMaybe mempty,
wordCharClass $> NFA.oneOf [NFA.charRange ('0', '9'),
NFA.charRange ('A', 'Z'),
NFA.charRange ('a', 'z'),
NFA.literalChar '_'],
digitCharClass $> NFA.charRange ('0', '9'),
negCharClass <&> NFA.noneOf,
posCharClass <&> NFA.oneOf,
wildcard $> NFA.anyChar,
litOrEscChar <&> NFA.literalChar]
group :: Parser NFA.StateB -> Parser (Maybe NFA.StateB)
group term = between (char '(') (char ')') (optional term) <?> "match group"
digitCharClass :: Parser ()
digitCharClass = () <$ string "\\d" <?> "digit character class"
wordCharClass :: Parser ()
wordCharClass = () <$ string "\\w" <?> "word character class"
negCharClass :: Parser [NFA.CharMatch]
negCharClass = charClass False NFA.LiteralChar (NFA.CharRange . sortPair) <?> "negative character class"
posCharClass :: Parser [NFA.StateB]
posCharClass = charClass True NFA.literalChar NFA.charRange <?> "positive character class"
charClass :: Bool -> (Char -> a) -> ((Char, Char) -> a) -> Parser [a]
charClass positive litf rangef = between open (char ']') (some singleOrRange)
where open = if positive then string "[" else string "[^"
singleOrRange = choice [singleChar <&> litf,
charRange <&> rangef]
singleChar = try $ litOrEscChar <* notFollowedBy (char '-')
charRange = (,) <$> litOrEscChar <* char '-' <*> litOrEscChar <?> "character range"
litOrEscChar = charWithReserved "^$\\[]-"
wildcard :: Parser ()
wildcard = () <$ char '.' <?> "wildcard"
litOrEscChar :: Parser Char
litOrEscChar = charWithReserved "^$\\|*+?()[]"
charWithReserved :: [Char] -> Parser Char
charWithReserved res = escChar <|> litChar
where litChar = noneOf res <?> "character literal"
escChar = char '\\' *> resChar <?> "escape sequence"
resChar = oneOf res <?> resLbl
resLbl = pprintChars res
pprintChars :: [Char] -> String
pprintChars chars = (mconcat . intersperse ", " . init) quoted ++ ", or " ++ last quoted
where quoted = map (\c -> ['\'', c, '\'']) chars
makeExprParser' :: (Parser a -> Parser a)
-> [[Operator Parser a]]
-> [[Operator Parser a]]
-> (a -> a -> a)
-> Parser a
makeExprParser' termf hiOps loOps implicitf = lo
where hi = makeExprParser hiTerm hiOps
lo = makeExprParser loTerm loOps
hiTerm = termf lo
loTerm = some hi <&> foldr1 implicitf