parsinghaskellparsec

How to parse expression with implicit multiplication with Parsec in Haskell


I have a grammar which allows implicit multiplication, (1+2)(3+4) is the same as (1+2)*(3+4) or (1+2)7 is the same as (1+2)*7 How do I implement this in Haskell? Here is what I have so far:

import Control.Monad
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token

languageDef =
  emptyDef { Token.identStart      = letter
           , Token.identLetter     = alphaNum
           , Token.reservedOpNames = ["+", "*"]
           }

lexer = Token.makeTokenParser languageDef

reservedOp = Token.reservedOp lexer
parens     = Token.parens     lexer
integer    = Token.integer    lexer

data Expr = Const Int
          | Binary BinOp Expr Expr
            deriving (Show)

data BinOp = Add | Multiply
             deriving (Show)

expression = buildExpressionParser operators term

operators = [ [Infix  (reservedOp "*"   >> return (Binary Multiply)) AssocLeft]
            , [Infix  (reservedOp "+"   >> return (Binary Add     )) AssocLeft]
            ]

term =   liftM (Const . fromIntegral) integer
     <|> parens expression
     <|> (do e1 <- expression
             e2 <- term
             return $ Binary Multiply e1 e2)

parseString str =
  case parse expression "" str of
    Left e  -> error $ show e
    Right r -> r

but it doesn't work, I have an error while parsing, when I try to parse ((1 + 5) 8) I have unexpected "8" expecting operator or ")"


Solution

  • I can't show a solution with parsec, but I have one for megaparsec. The general idea is based on an answer for a similar question where OP was using FParsec (F#).

    The idea is to split the expression parser into two where one handles all operators with higher precedence than the implicit operator and the other handles the rest.

    Utilizing the megaparsec expression parser (module Control.Monad.Combinators.Expr, package parser-combinators) it can be achieved like so:

    type Parser = Parsec Void String
    
    makeExprParser' :: (Parser a -> Parser a)
                    -> [[Operator Parser a]]
                    -> [[Operator Parser a]]
                    -> (a -> a -> a)
                    -> Parser a
    makeExprParser' termf hiOps loOps implicitf = lo
      where hi = makeExprParser hiTerm hiOps
            lo = makeExprParser loTerm loOps
            hiTerm = termf lo
            loTerm = some hi <&> foldr1 implicitf
    

    Below is the full code where I used it to parse simple regular expressions (the implicit op being concatenation):

    module MyGrep.Parser (parseRegex) where
    
    import Control.Monad
    import Control.Monad.Combinators.Expr
    import Data.Bifunctor
    import Data.Functor ((<&>), ($>))
    import Data.List (intersperse)
    import Data.Maybe
    import Data.Void (Void)
    import MyGrep.NFA.Base qualified as NFA
    import MyGrep.NFA.Build qualified as NFA
    import MyGrep.Util (sortPair)
    import Text.Megaparsec
    import Text.Megaparsec.Char
    
    type Parser = Parsec Void String
    
    parseRegex :: String -> Either String NFA.StateB
    parseRegex = first errorBundlePretty . runParser regex' ""
    
    regex' :: Parser NFA.StateB
    regex' = do
      start <- optStartAnchor
      inner <- regex
      end <- optEndAnchor <* eof
      return $ start <> inner <> end
    
    optStartAnchor :: Parser NFA.StateB
    optStartAnchor = optional (char '^') <&> maybe NFA.anyString (const mempty)
    
    optEndAnchor :: Parser NFA.StateB
    optEndAnchor   = optional (char '$') <&> maybe NFA.anyString (const mempty)
    
    hiOpTbl :: [[Operator Parser NFA.StateB]]
    hiOpTbl = [[Postfix (char '*' $> NFA.zeroOrMore),
                Postfix (char '+' $> NFA.oneOrMore),
                Postfix (char '?' $> NFA.zeroOrOne)]]
    
    loOpTbl :: [[Operator Parser NFA.StateB]]
    loOpTbl = [[InfixL  (char '|' $> NFA.alternation)]]
    
    implicitOp :: NFA.StateB -> NFA.StateB -> NFA.StateB
    implicitOp = (<>)
    
    regex :: Parser NFA.StateB
    regex = makeExprParser' term hiOpTbl loOpTbl implicitOp
    
    term :: Parser NFA.StateB -> Parser NFA.StateB
    term term' = choice [
      group term'    <&> fromMaybe mempty,
      wordCharClass   $> NFA.oneOf [NFA.charRange ('0', '9'),
                                    NFA.charRange ('A', 'Z'),
                                    NFA.charRange ('a', 'z'),
                                    NFA.literalChar '_'],
      digitCharClass  $> NFA.charRange ('0', '9'),
      negCharClass   <&> NFA.noneOf,
      posCharClass   <&> NFA.oneOf,
      wildcard        $> NFA.anyChar,
      litOrEscChar   <&> NFA.literalChar]
    
    group :: Parser NFA.StateB -> Parser (Maybe NFA.StateB)
    group term = between (char '(') (char ')') (optional term) <?> "match group"
    
    digitCharClass :: Parser ()
    digitCharClass = () <$ string "\\d" <?> "digit character class"
    
    wordCharClass :: Parser ()
    wordCharClass = () <$ string "\\w" <?> "word character class"
    
    negCharClass :: Parser [NFA.CharMatch]
    negCharClass = charClass False NFA.LiteralChar (NFA.CharRange . sortPair) <?> "negative character class"
    
    posCharClass :: Parser [NFA.StateB]
    posCharClass = charClass True NFA.literalChar NFA.charRange <?> "positive character class"
    
    charClass :: Bool -> (Char -> a) -> ((Char, Char) -> a) -> Parser [a]
    charClass positive litf rangef = between open (char ']') (some singleOrRange)
      where open = if positive then string "[" else string "[^"
            singleOrRange = choice [singleChar <&> litf,
                                    charRange  <&> rangef]
            singleChar = try $ litOrEscChar <* notFollowedBy (char '-')
            charRange = (,) <$> litOrEscChar <* char '-' <*> litOrEscChar <?> "character range"
            litOrEscChar = charWithReserved "^$\\[]-"
    
    wildcard :: Parser ()
    wildcard = () <$ char '.' <?> "wildcard"
    
    litOrEscChar :: Parser Char
    litOrEscChar = charWithReserved "^$\\|*+?()[]"
    
    charWithReserved :: [Char] -> Parser Char
    charWithReserved res = escChar <|> litChar
      where litChar = noneOf res <?> "character literal"
            escChar = char '\\' *> resChar <?> "escape sequence"
            resChar = oneOf res <?> resLbl
            resLbl  = pprintChars res
    
    pprintChars :: [Char] -> String
    pprintChars chars = (mconcat . intersperse ", " . init) quoted ++ ", or " ++ last quoted
      where quoted = map (\c -> ['\'', c, '\'']) chars
    
    makeExprParser' :: (Parser a -> Parser a)
                    -> [[Operator Parser a]]
                    -> [[Operator Parser a]]
                    -> (a -> a -> a)
                    -> Parser a
    makeExprParser' termf hiOps loOps implicitf = lo
      where hi = makeExprParser hiTerm hiOps
            lo = makeExprParser loTerm loOps
            hiTerm = termf lo
            loTerm = some hi <&> foldr1 implicitf