fparsecrfc2822

fparsec rfc2822 parsing multiple header lines


I'm trying to parse RFC 2822 using fparsec but I'm unable to deal with the headers with multiple lines: (it gets confused with the next header): here's my best try: do you have any tips?

let str (s:string) = pstring s

let stringLiteral =
    manySatisfy (fun c -> c <> ':' && c <> '\r' && c <> '\n')

let ws = many (pchar ' ')

let keyValueSimple  =  stringLiteral .>>. (ws >>. str ":" >>. ws >>. stringLiteral) .>>. pchar '\n' |>> (fun (a,b) -> a)

let lineValue = ws >>. stringLiteral .>>. (pchar '\n' .>>. ( pchar '\t')) |>> ( fun (a,b) -> a )

let lastValue = ws >>. stringLiteral .>>. (pchar '\n' .>> notFollowedBy ( pchar '\t') ) |>> ( fun (a,b) -> a )

let keyValueComplex =  stringLiteral .>>. (ws >>. pchar ':') .>>. (many lineValue) .>>. lastValue |>> ( fun (((f),d),b) -> (f,f) )                     

let headers = many1 (keyValueComplex)

let parse (fileName:string) = 
    test headers "Return-Path: <ewrwe@werw.com>\n\twerwe\nDelivered-To: adfasdf@aasdfas.afa.com\n "

I get expecting \t at 3,1: Delivered-To: adfasdf@aasdfas.afa.com


Solution

  • never mind it looks like I needed to backtrack (using attempt) in order for the parser no to always expect a \t but instead look for the next header.

    let keyValueComplex = stringLiteral .>>. (ws >>. pchar ':') .>>. (many (attempt lineValue)) .>>. lastValue |>> ( fun ((((g,h)),d),b) -> (g,(Seq.fold (+) "" d) + b) )

    This now yields:

    Success: [("Return-Path", "<ewrwe@werw.com>werwe"); ("Delivered-To", "adfasdf@aasdfas.afa.com")]