binaryadaendianness

How do I read a binary file in BigEndian order to a record?


I have a binary file format that is written in BigEndian order. The files are of varying size so I can't use Sequential_IO for this, as I need to read different types.

The problem is, when using Stream_IO, I can't find a way to use BigEndian, and Scalar_Storage_Order also doesn't affect anything. Also, I'm a very fresh beginner at Ada and overall tips and suggestions to the code are very welcome.

share.adb:
with Ada.Text_IO;
with Ada.Streams.Stream_IO; use Ada.Streams.Stream_IO;
with Ada.Streams;           use Ada.Streams;

package body Share is

   procedure Read_Share (Segment_Size : Positive; Required_Shares : Positive)
   is
      --  Ceiling function
      Block_Size : constant Positive :=
        (Segment_Size + (Required_Shares - 1)) / Required_Shares;
      type Block is array (Integer range 0 .. Block_Size) of Byte;

      S               : Stream_Access;
      Share_File      : File_Type;
      My_Share_Header : Share_Header;
   begin
      Open (Share_File, In_File, "../go-tahoe/3");
      S := Stream (Share_File);
      Share_Header'Read (S, My_Share_Header);
      My_Share_Header.Block_Size := Unsigned_32 (Block_Size);
      Display_Share_Content (My_Share_Header);

      Close (Share_File);
      --  Read_Blocks (My_Share_Header, Share_File);

      --  Now My_Share contains the values read from the binary file
   end Read_Share;

   procedure Display_Share_Content (My_Share_Header : Share_Header) is
   begin
      Ada.Text_IO.Put_Line
        ("Share version: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Version));
      Ada.Text_IO.Put_Line
        ("Share Data Length: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Data_Length));
      Ada.Text_IO.Put_Line
        ("Lease Number: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Lease_number));
      Ada.Text_IO.Put_Line
        ("Share version: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Version));
      Ada.Text_IO.Put_Line
        ("Block Size: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Block_Size));
      Ada.Text_IO.Put_Line
        ("Data Size: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Data_Size));
      Ada.Text_IO.Put_Line
        ("Data offset: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Data_Offset));
      Ada.Text_IO.Put_Line
        ("Plaintext hash tree offset: " &
         Interfaces.Unsigned_32'Image
           (My_Share_Header.Plaintext_Hash_Tree_Offset));
      Ada.Text_IO.Put_Line
        ("Crypttext hash tree offset: " &
         Interfaces.Unsigned_32'Image
           (My_Share_Header.Crypttext_Hash_Tree_Offset));
      Ada.Text_IO.Put_Line
        ("Block hashes offset: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Block_Hashes_Offset));
      Ada.Text_IO.Put_Line
        ("Share hashes offset: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.Share_Hashes_Offset));
      Ada.Text_IO.Put_Line
        ("URI Extension Length and URI Extension block offset: " &
         Interfaces.Unsigned_32'Image (My_Share_Header.URI_Extension_Offset));
   end Display_Share_Content;

   procedure Read_Blocks
     (My_Share_Header : Share_Header; Share_File : File_Type)
   is
      Total_Blocks : Interfaces.Unsigned_32 := My_Share_Header.Data_Size;
   begin
      Ada.Text_IO.Put ("");

   end Read_Blocks;
end Share;

share.ads:
with Interfaces; use Interfaces;
with System;     use System;
with Ada.Streams.Stream_IO;

package Share is

   type Byte is new Interfaces.Unsigned_8;
   type Kilobyte is array (Integer range 0 .. 1_023) of Byte;
   type Kilobyte_array is array (Integer range <>) of Kilobyte;

   type Share_Header is record
      Version                    : Unsigned_32;
      Data_Length                : Unsigned_32;
      Lease_number               : Unsigned_32;
      Version_Junk               : Unsigned_32;
      --  unused as it can be calculated from the URI
      Block_Size                 : Unsigned_32;
      Data_Size                  : Unsigned_32;
      Data_Offset                : Unsigned_32;
      Plaintext_Hash_Tree_Offset : Unsigned_32;
      Crypttext_Hash_Tree_Offset : Unsigned_32;
      Block_Hashes_Offset        : Unsigned_32;
      Share_Hashes_Offset        : Unsigned_32;
      URI_Extension_Offset       : Unsigned_32;
   end record;

   for Share_Header use record
      Version at 0 range 0 .. 32;
      --  Data_Length                : Unsigned_32;
      --  Lease_number               : Unsigned_32;
      --  Version_Junk               : Unsigned_32;
      --  --  unused as it can be calculated from the URI
      --  Block_Size                 : Unsigned_32;
      --  Data_Size                  : Unsigned_32;
      --  Data_Offset                : Unsigned_32;
      --  Plaintext_Hash_Tree_Offset : Unsigned_32;
      --  Crypttext_Hash_Tree_Offset : Unsigned_32;
      --  Block_Hashes_Offset        : Unsigned_32;
      --  Share_Hashes_Offset        : Unsigned_32;
      --  URI_Extension_Offset       : Unsigned_32;
   end record;

   for Share_Header'Bit_Order use High_Order_First;
   for Share_Header'Scalar_Storage_Order use High_Order_First;

   procedure Read_Share (Segment_Size : Positive; Required_Shares : Positive);
   procedure Display_Share_Content (My_Share_Header : Share_Header);
   procedure Read_Blocks
     (My_Share_Header : Share_Header;
      Share_File      : Ada.Streams.Stream_IO.File_Type);
end Share;

Tried defining the component clauses to not much success, different bit orders, modifying the Stream_IO storage arrays.


Solution

  • Apologies for a very late answer, but to complete the answer set: IMO the default method here is to derive a new type from Unsigned_32 and write your own stream 'Read attribute for this type, to use whatever encoding you need, in this case a big-endian encoding.

    So, in share.ads, I would add:

    type Word is new Interfaces.Unsigned_32;
    -- An Unsigned_32 with big-endian encoding in input streams.
    
    procedure Read_Word (
       Stream : access Ada.Streams.Root_Stream_Type'Class;
       Item   : out Word'Base);
    
    for Word'Read use Read_Word;
    

    I would then change the type of all relevant Share_Header components from Unsigned_32 to Word, and in share.adb add:

    procedure Read_Word (
      Stream : access Ada.Streams.Root_Stream_Type'Class;
      Item   : out Word'Base)
    is
       B1, B2, B3, B4 : Byte;
    begin
       Byte'Read (Stream, B1);
       Byte'Read (Stream, B2);
       Byte'Read (Stream, B3);
       Byte'Read (Stream, B4);
       
       Item :=
             Shift_Left (Word (B1), 24)
          or Shift_Left (Word (B2), 16)
          or Shift_Left (Word (B3),  8)
          or             Word (B4);
       
    end Read_Word;
    

    As you see, the Read_Word procedure says exactly in which order the bytes are read and how they are composed into a Word -- you have full control.

    The compiler will now automatically use Read_Byte in Share_Header'Read.

    I use this method extensively to read various kinds of binary files such as ELF and other executable program formats. This solution is entirely standard Ada and does not rely on any GNAT-specific stuff.