delphihttpindymultipartform-data

Indy MIME decoding of Multipart/Form-Data Requests returns trailing CR/LF


Indy 10.6 revision 5128 seems to contain a change which breaks the code below for HTTP form uploads.

The received data contains two additional bytes at the end, a CR/LF pair.

Reading through the changed code lines between 5127 and 5128 did not bring me to the root cause.

I will try to debug it when I find the time and post the result here (but maybe somebody is faster).

Here is a stand-alone demo application which shows a HTML upload form at http://127.0.0.1:8080

program IndyMultipartUploadDemo;

{$APPTYPE CONSOLE}

uses
  IdHTTPServer, IdCustomHTTPServer, IdContext, IdSocketHandle, IdGlobal,
  IdMessageCoder, IdGlobalProtocols, IdMessageCoderMIME, IdMultiPartFormData,
  SysUtils, Classes;

type
  TMimeHandler = procedure(var VDecoder: TIdMessageDecoder;
    var VMsgEnd: Boolean; const Response: TIdHTTPResponseInfo) of object;

  TMyServer = class(TIdHTTPServer)
  private
    procedure ProcessMimePart(var VDecoder: TIdMessageDecoder;
      var VMsgEnd: Boolean; const Response: TIdHTTPResponseInfo);
    function IsHeaderMediaType(const AHeaderLine, AMediaType: String): Boolean;
    function MediaTypeMatches(const AValue, AMediaType: String): Boolean;
    function GetUploadFolder: string;
    procedure HandleMultipartUpload(Request: TIdHTTPRequestInfo; Response:
     TIdHTTPResponseInfo; MimeHandler: TMimeHandler);
  public
    procedure InitComponent; override;
    procedure DoCommandGet(AContext: TIdContext;
      ARequestInfo: TIdHTTPRequestInfo; AResponseInfo: TIdHTTPResponseInfo); override;
  end;

procedure Demo;
var
  Server: TMyServer;
begin
  ReportMemoryLeaksOnShutdown := True;

  Server := TMyServer.Create;
  try
    try
      Server.Active := True;
    except
      on E: Exception do
      begin
        WriteLn(E.ClassName + ' ' + E.Message);
      end;
    end;
    WriteLn('Hit any key to terminate.');
    ReadLn;
  finally
    Server.Free;
  end;
end;

procedure TMyServer.InitComponent;
var
  Binding: TIdSocketHandle;
begin
  inherited;

  Bindings.Clear;
  Binding := Bindings.Add;
  Binding.IP := '127.0.0.1';
  Binding.Port := 8080;

  KeepAlive := True;
end;

procedure TMyServer.DoCommandGet(AContext: TIdContext;
  ARequestInfo: TIdHTTPRequestInfo; AResponseInfo: TIdHTTPResponseInfo);
begin
  AResponseInfo.ContentType := 'text/html';
  AResponseInfo.CharSet := 'UTF-8';

  if ARequestInfo.CommandType = hcGET then
  begin
    AResponseInfo.ContentText :=
        '<!DOCTYPE HTML>' + #13#10
      + '<html>' + #13#10
      + ' <head>' + #13#10
      + ' <title>Multipart Upload Example</title>' + #13#10
      + ' </head>' + #13#10
      + ' <body> ' + #13#10
      + ' <form enctype="multipart/form-data" method="post">' + #13#10
      + ' <fieldset>' + #13#10
      + ' <legend>Standard file upload</legend>' + #13#10
      + ' <label>File input</label>' + #13#10
      + ' <input type="file" class="input-file" name="upload" />' + #13#10
      + ' <button type="submit" class="btn btn-default">Upload</button>' + #13#10
      + ' </fieldset>' + #13#10
      + ' </form>' + #13#10
      + ' </body>' + #13#10
      + '</html>' + #13#10;
  end
  else
  begin
    if ARequestInfo.CommandType = hcPOST then
    begin
      if IsHeaderMediaType(ARequestInfo.ContentType, 'multipart/form-data') then
      begin
        HandleMultipartUpload(ARequestInfo, AResponseInfo, ProcessMimePart);
      end;
    end;
  end;
end;

// based on code on the Indy and Winsock Forum articles
// http://forums2.atozed.com/viewtopic.php?f=7&t=10924
// http://embarcadero.newsgroups.archived.at/public.delphi.internet.winsock/201107/1107276163.html

procedure TMyServer.ProcessMimePart(var VDecoder: TIdMessageDecoder;
  var VMsgEnd: Boolean; const Response: TIdHTTPResponseInfo);
var
  LMStream: TMemoryStream;
  LNewDecoder: TIdMessageDecoder;
  UploadFile: string;
begin
  LMStream := TMemoryStream.Create;
  try
    LNewDecoder := VDecoder.ReadBody(LMStream, VMsgEnd);
    if VDecoder.Filename <> '' then
    begin
      try
        LMStream.Position := 0;
        Response.ContentText := Response.ContentText
          + Format('<p>%s %d bytes</p>' + #13#10,
            [VDecoder.Filename, LMStream.Size]);

        // write stream to upload folder
        UploadFile := GetUploadFolder + VDecoder.Filename;
        LMStream.SaveToFile(UploadFile);
        Response.ContentText := Response.ContentText
          + '<p>' + UploadFile + ' written</p>';

      except
        LNewDecoder.Free;
        raise;
      end;
    end;
    VDecoder.Free;
    VDecoder := LNewDecoder;
  finally
    LMStream.Free;
  end;
end;

function TMyServer.IsHeaderMediaType(const AHeaderLine, AMediaType: String): Boolean;
begin
  Result := MediaTypeMatches(ExtractHeaderItem(AHeaderLine), AMediaType);
end;

function TMyServer.MediaTypeMatches(const AValue, AMediaType: String): Boolean;
begin
  if Pos('/', AMediaType) > 0 then begin
    Result := TextIsSame(AValue, AMediaType);
  end else begin
    Result := TextStartsWith(AValue, AMediaType + '/');
  end;
end;

function TMyServer.GetUploadFolder: string;
begin
  Result := ExtractFilePath(ParamStr(0)) + 'upload\';
  ForceDirectories(Result);
end;

procedure TMyServer.HandleMultipartUpload(Request: TIdHTTPRequestInfo;
  Response: TIdHTTPResponseInfo; MimeHandler: TMimeHandler);
var
  LBoundary, LBoundaryStart, LBoundaryEnd: string;
  LDecoder: TIdMessageDecoder;
  LLine: string;
  LBoundaryFound, LIsStartBoundary, LMsgEnd: Boolean;
begin
  LBoundary := ExtractHeaderSubItem(Request.ContentType, 'boundary',
    QuoteHTTP);
  if LBoundary = '' then
  begin
    Response.ResponseNo := 400;
    Response.CloseConnection := True;
    Response.WriteHeader;
    Exit;
  end;

  LBoundaryStart := '--' + LBoundary;
  LBoundaryEnd := LBoundaryStart + '--';

  LDecoder := TIdMessageDecoderMIME.Create(nil);
  try
    TIdMessageDecoderMIME(LDecoder).MIMEBoundary := LBoundary;
    LDecoder.SourceStream := Request.PostStream;
    LDecoder.FreeSourceStream := False;

    LBoundaryFound := False;
    LIsStartBoundary := False;
    repeat
      LLine := ReadLnFromStream(Request.PostStream, -1, True);
      if LLine = LBoundaryStart then
      begin
        LBoundaryFound := True;
        LIsStartBoundary := True;
      end
      else if LLine = LBoundaryEnd then
      begin
        LBoundaryFound := True;
      end;
    until LBoundaryFound;

    if (not LBoundaryFound) or (not LIsStartBoundary) then
    begin
      Response.ResponseNo := 400;
      Response.CloseConnection := True;
      Response.WriteHeader;
      Exit;
    end;

    LMsgEnd := False;
    repeat
      TIdMessageDecoderMIME(LDecoder).MIMEBoundary := LBoundary;
      LDecoder.SourceStream := Request.PostStream;
      LDecoder.FreeSourceStream := False;

      LDecoder.ReadHeader;
      case LDecoder.PartType of
        mcptText, mcptAttachment:
          begin
            MimeHandler(LDecoder, LMsgEnd, Response);
          end;
        mcptIgnore:
          begin
            LDecoder.Free;
            LDecoder := TIdMessageDecoderMIME.Create(nil);
          end;
        mcptEOF:
          begin
            LDecoder.Free;
            LMsgEnd := True;
          end;
      end;
    until (LDecoder = nil) or LMsgEnd;
  finally
    LDecoder.Free;
  end;
end;

begin
  Demo;
end.

Solution

  • The current SVN revision is 5203, so you are a little behind in updates.

    I tested your code as-is using revision 5203 in XE2 with IE11.

    I uploaded a test .pas file, and it is 53 bytes larger in the upload folder. I can confirm that the raw PostStream data prior to decoding is correct.

    Yes, I do see an extra CRLF at the end of the file, and that is related to the way TIdMessageDecoderMIME decodes non-binary non-base64/QP-encoded data (which your example does not). It reads the data line-by-line, decoding each line as it goes, writing decoded lines to the destination stream with a new line break when not using a binary transfer encoding. That logic does not take into account that a line break in front of a MIME boundary belongs to the boundary, not to the data preceding the boundary. The MIME spec is quite clear on that, but Indy does not yet take that into account for non-base64 data.

    The rest of the differences in file size are all related to non-ASCII characters being converted to sequences of $3F bytes, including the UTF-8 BOM. This is because the PostStream data is being decoded as 7bit ASCII in TIdMessageDecoderMIME.ReadBody() because there is no Content-Transfer-Encoding header being sent with the file data, so Indy is defaulting to ASCII due to this statement in RFC 2045 Section 6.1:

    "Content-Transfer-Encoding: 7BIT" is assumed if the Content-Transfer-Encoding header field is not present.

    However, Section 6.4 states the following, which seems to contradict 6.1:

    Any entity with an unrecognized Content-Transfer-Encoding must be treated as if it has a Content-Type of "application/octet-stream", regardless of what the Content-Type header field actually says.

    ReadBody() handles both cases, however 6.1 is checked first, so Indy assumes a 7bit encoding, which then nullifies its handling of 6.4 since 7bit is not an unrecognized encoding. Unless one is to assume that a missing Content-Transfer-Encoding should be treated as an unrecognized encoding, which Indy currently does not.

    The actual Content-Type of the upload is application/octet-stream, which implies an 8bit encoding. When I update ReadBody() to treat application/octet-stream as 8bit instead of 7bit when applying Section 6.1, all of the problems go away:

    if LContentTransferEncoding = '' then begin
      // RLebeau 04/08/2014: According to RFC 2045 Section 6.1:
      // "Content-Transfer-Encoding: 7BIT" is assumed if the
      // Content-Transfer-Encoding header field is not present."
      if IsHeaderMediaType(LContentType, 'application/mac-binhex40') then begin  {Do not Localize}
        LContentTransferEncoding := 'binhex40'; {do not localize}
      end
    
      // START FIX!!
      else if IsHeaderMediaType(LContentType, 'application/octet-stream') then begin  {Do not Localize}
        LContentTransferEncoding := '8bit'; {do not localize}
      end
      // END FIX!!
    
      else begin
        LContentTransferEncoding := '7bit'; {do not localize}
      end;
    end
    

    The uploaded file is the correct file size, bytes are decoded and written correctly without non-ASCII sequences being converted to $3F sequences, and there is no extra CRLF at the end of the file.

    I will have to investigate the issue further to see if there is a better way to handle this descrepency. I have opened tickets in Indy's issue trackers for it. In the meantime, you have a workaround if you patch your copy of Indy.