haskellmegaparsec

Haskell Megaparsec: how to show a traceback of all parsers that led up to an error?


Here's my toy file:

import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Void (Void)

type Parser = Parsec Void String

myParser :: Parser String
myParser = do
            d <- digitChar
            c <- letterChar
            return $ replicate (read [d]) c

Now from ghci, if I type parseTest (myParser <?> "foo") "3a" I get "aaa" as expected, but if I type parseTest (myParser <?> "foo") "33a" then I get:

1:2:
  |
1 | 33a
  |  ^
unexpected '3'
expecting letter

The error message makes sense in this simple case (I had to enter a letter instead of another digit) but when writing more complicated parsers, letterChar may appear in any number of composite parsers, so it's not clear which letterChar was the one that failed. Since I passed in a label foo for my parser, I would like it if the error message instead said something like the following:

1:2:
  |
1 | 33a
  |  ^
error while parsing foo:
  unexpected '3'
  expecting letter

And more generally, as long as I give my parsers labels using <?>, I would like the whole traceback of errors to be shown, like:

error while parsing grandparent:
  error while parsing parent:
    unexpected '3'
    expecting letter

Is there a way to do this in Megaparsec?


Solution

  • Megaparsec has no built in support for doing this, but you can use its custom error machinery.

    We can define a custom error type that adds a context label to an existing ParseError, together with a ShowErrorComponent instance to display it within the error message. (The weird orphan Ord instance for ParseError here satisfies a technical requirement. Custom errors need an Ord instance, but ParseError doesn't have one, so we have to derive one if we want to include a nested ParseError in our custom error.)

    data ErrorWithLabel = ErrorWithLabel String (ParseError String ErrorWithLabel)
      deriving (Eq, Ord)
    
    -- orphan instance needed for technical reasons
    deriving instance Ord (ParseError String ErrorWithLabel)
    
    instance ShowErrorComponent ErrorWithLabel where
      showErrorComponent (ErrorWithLabel l e) =
        "while parsing " <> l <> ",\n" <> parseErrorTextPretty e
    

    By itself, this doesn't do anything, but we can modify the definitions of <?> and its non-operator equivalent label to make use of this custom error. Specifically, we can modify them so they call the original Megaparsec definition of label which properly handles the case where a parser fails without consuming input (by presenting the label as the "lowermost" error) and then also handle the case where a parser fails after consuming input (by wrapping the error with an ErrorWithLabel context):

    import Text.Megaparsec hiding (label, (<?>))
    import qualified Text.Megaparsec as P
    import Text.Megaparsec.Internal (ParsecT(..))
    import qualified Data.Set as Set
    
    label :: String -> Parser p -> Parser p
    label l p = ParsecT $ \s cok cerr eeok eerr ->
      let addLabel e = FancyError (errorOffset e) .
            Set.singleton . ErrorCustom $ ErrorWithLabel l e
      in unParser (P.label l p) s cok (cerr . addLabel) eeok eerr
    
    infix 0 <?>
    (<?>) :: Parser p -> String -> Parser p
    (<?>) = flip label
    

    This works fine for your example:

    λ> parseTest (myParser <?> "foo") "33a"
    1:2:                                                                                                     
      |                                                                                                      
    1 | 33a                                                                                                  
      |  ^                                                                                                   
    while parsing foo,                                                                                       
    unexpected '3'                                                                                           
    expecting letter
    
    λ> parseTest ((myParser <?> "parent") <?> "grandparent") "33a"
    1:2:                                                                                                     
      |                                                                                                      
    1 | 33a                                                                                                  
      |  ^                                                                                                   
    while parsing grandparent,                                                                               
    while parsing parent,                                                                                    
    unexpected '3'                                                                                           
    expecting letter                                                                                         
    

    A full code example with some slightly more complicated labelling:

    {-# LANGUAGE GHC2021 #-}
    
    module Main where
    
    import Text.Megaparsec hiding (label, (<?>))
    import qualified Text.Megaparsec as P
    import Text.Megaparsec.Internal (ParsecT(..))
    import Text.Megaparsec.Char
    import qualified Data.Set as Set
    
    data ErrorWithLabel = ErrorWithLabel String (ParseError String ErrorWithLabel)
      deriving (Eq, Ord)
    
    deriving instance Ord (ParseError String ErrorWithLabel)
    
    instance ShowErrorComponent ErrorWithLabel where
      showErrorComponent (ErrorWithLabel l e) =
        "while parsing " <> l <> ",\n" <> parseErrorTextPretty e
    
    type Parser = Parsec ErrorWithLabel String
    
    label :: String -> Parser p -> Parser p
    label l p = ParsecT $ \s cok cerr eeok eerr ->
      let addLabel e = FancyError (errorOffset e) .
            Set.singleton . ErrorCustom $ ErrorWithLabel l e
      in unParser (P.label l p) s cok (cerr . addLabel) eeok eerr
    
    infix 0 <?>
    (<?>) :: Parser p -> String -> Parser p
    (<?>) = flip label
    
    repspec :: Parser String
    repspec = (do
      d <- digitChar
      c <- letterChar <?> "a character to replicate"
      return $ replicate (read [d]) c)
      <?> "replication spec"
    
    literal :: Parser String
    literal = between (char '\'') (char '\'') (takeWhileP Nothing (/= '\'')) <?> "literal string"
    
    comment :: Parser String
    comment = "" <$ char ';' <* takeRest <?> "a comment"
    
    expr :: Parser String
    expr = (repspec <|> literal <?> "expression")
      <|>  (comment <?> "comment")
    
    main :: IO ()
    main = mapM_ (parseTest (expr <* eof))
      [ "3a"   -- parses okay
    
      , "33a"  -- while parsing expression,
      --  ^    -- while parsing repspec,
               -- unexpected '3'
               -- expected a character to replicate
    
      , "?"    -- unexpected '?'
      -- ^     -- expecting comment or expression
    
      , "'x"   -- while parsing expression,
      --   ^   -- while parsing literal string,
               -- unexpected end of input
               -- expecting '''
      ]