utf-8character-encodingbyte-order-markdelphi-11-alexandria

Remove UTF-8 BOM from TStream output


I'm using Delphi 11. I have to write a UTF-8 .csv file without a BOM using a TStream object, but using TEncoding.UTF8 produces a UTF-8 file with a BOM, so I tried using the encoding directly without success:

function TfMain.generateCsvFile(pathname : String ; outStr : String; create : boolean; append:boolean; close:boolean) : Boolean;
var
  //Writer: TStreamWriter;
  UTF8withoutBOM: TEncoding;
begin
  Result := False;
  UTF8withoutBOM := TEncoding.GetEncoding(65001);
  try
    if create then begin
      Writer := TStreamWriter.Create(pathname, False, UTF8WithoutBOM);
    end;
    if append then begin
      Writer.WriteLine(outStr);
      Writer.Flush;
      Result := True;
    end;
    if close then begin
      Writer.Close;
      Writer.Free;
    end;
  except
    on e : Exception do begin
      ShowMessage('Errore '+e.Message);
      lbConsole.Items.Add('Errore '+e.Message);
    end;
  end;
end;

Is there a way to tell Delphi to remove the BOM using TStreamWriter?


Solution

  • You can derive a new class from SysUtils.TUTF8Encoding and override its virtual GetPreamble() method to return an empty byte array. Then use that class instead of TEncoding.UTF8 or TEncoding.GetEncoding(65001).

    type
      TUTF8EncodingNoBOM = class(TUTF8Encoding)
      public
        function GetPreamble: TBytes; override;
      end;
    
    function TUTF8EncodingNoBOM.GetPreamble: TBytes;
    begin
      Result := nil;
    end;
    
    function TfMain.generateCsvFile(pathname : String ; outStr : String; create : boolean; append: boolean; close: boolean) : Boolean;
    var
      ... 
      UTF8withoutBOM: TEncoding;
    begin
      Result := False;
      UTF8withoutBOM := TUTF8EncodingNoBOM.Create;
      try
        ...
      finally    
        UTF8withoutBOM.Free;
      end;
    end; 
    

    On a side note: you need to Free() the TEncoding object that is returned by TEncoding.GetEncoding() otherwise it will be leaked.