pythoncryptographybinarybitarray

Efficient bitarray operations in Python


I'm implementing a class for handling arrays of bit values in Python. So far, here is what I did:

class Bitarray:
    """ Representation of an array of bits.
    :param bits: the list of boolean values (i.e. {False, True}) of the bitarray.
    """
    def __init__(self, values:list[bool]):
        self._bits:list[bool] = values
        self._length:int = len(values)

    @staticmethod
    def from_bytes(data:bytes, byteorder:str = None):
        def _access_bit(data, index):
            """ Credits: https://stackoverflow.com/a/43787831/23022499
            """
            base = int(index // 8)
            shift = int(index % 8)
            return (data[base] >> shift) & 0x1
        
        if byteorder == None:
            byteorder = sys.byteorder
        elif byteorder != 'little' and byteorder != 'big':
            raise ValueError('Param byteorder must be either "little" or "big".')
        
        bin_data = [_access_bit(data, i) for i in range(len(data) * 8)]
        bin_data = [bool(b) for b in bin_data]
        return Bitarray(bin_data) if byteorder == 'big' else Bitarray(bin_data[::-1])
    
    def __getitem__(self, index) -> bool:
        return self._bits[index]
    
    def __len__(self) -> int:
        return self._length
    
    # bit-wise operations
    def __and__(self, other):
        if type(other) != Bitarray:
            raise TypeError("Unsupported operand type(s) for &: '{}' and '{}'".format(type(self), type(other)))
        
        if self._length != len(other):
            raise IndexError("The arguments for bitwise operations must have same length.")
        
        return Bitarray([(a & b) for a, b in zip(self._bits, other._bits)])

    def __or__(self, other):
        if type(other) != Bitarray:
            raise TypeError("Unsupported operand type(s) for &: '{}' and '{}'".format(type(self), type(other)))
        
        if self._length != len(other):
            raise IndexError("The arguments for bitwise operations must have same length.")
        
        return Bitarray([(a | b) for a, b in zip(self._bits, other._bits)])

    def __xor__(self, other):
        if type(other) != Bitarray:
            raise TypeError("Unsupported operand type(s) for &: '{}' and '{}'".format(type(self).__name__, type(other).__name__))
        
        if self._length != len(other):
            raise IndexError("The arguments for bitwise operations must have same length.")
        
        return Bitarray([(a ^ b) for a, b in zip(self._bits, other._bits)])

    # to string
    def __str__(self):
        return ''.join(str(int(b)) for b in self._bits)

If you are wondering about the usage, I want to generate random values using os.urandom() and then perform bitwise operations with these values. An example:

import os
import sys

a = Bitarray.from_bytes(os.urandom(16 // 8), sys.byteorder)
b = Bitarray.from_bytes(os.urandom(16 // 8), sys.byteorder)

print('XOR result: {}'.format(a ^ b))

By far, what I've done works. But I am pretty sure this is so inefficient that for those who are reading this and know way more about Python, I just committed some horrible sin. :P Jokes aside, iterating over boolean values for bitwise operations can't be good, right? Is there some more efficient way to do this?

P.S. For those who are curious, I am trying to build a cryptographic protocol which uses random keys and xor operations. I know about some modules like cryptography, bitarray and others but I thought it would be funnier to try to implement something on my own. Sorry for the lack of documentation and comments, I'll try to improve!

EDIT: Of course one may ask why do I need to use bit arrays if I could just perform bitwise operations using bytes. I need to access the single bit values and I would like my Bitarray class to be able to perform bitwise operations without having to move back to bytes everytime.


Solution

  • I suggest using Python's infinite precision integers to hold the values since all the binary operations can be performed directly on them. Quick edit of your code:

    import os
    import sys
    
    class Bitarray:
        def __init__(self, bits, value=0):
            if value >= (1 << bits):
                raise ValueError(f'value does not fit into {bits} bit(s)')
            self._bits: int = value
            self._length: int = bits
    
        @classmethod
        def from_bytes(cls, data: bytes, byteorder: str = None):
            if byteorder is None:
                byteorder = sys.byteorder
            elif byteorder != 'little' and byteorder != 'big':
                raise ValueError('Param byteorder must be either "little" or "big".')
            value = int.from_bytes(data, byteorder)
            return cls(len(data) * 8, value)
    
        def __getitem__(self, index) -> bool:
            return bool(self._bits & (1 << index))
    
        def __len__(self) -> int:
            return self._length
    
        # bit-wise operations
        def __and__(self, other):
            if not isinstance(other, Bitarray):
                raise TypeError("Unsupported operand type(s) for &: '{}' and '{}'".format(type(self), type(other)))
            if self._length != len(other):
                raise IndexError("The arguments for bitwise operations must have same length.")
            return Bitarray(self._length, self._bits & other._bits)
    
        def __or__(self, other):
            if not isinstance(other, Bitarray):
                raise TypeError("Unsupported operand type(s) for &: '{}' and '{}'".format(type(self), type(other)))
            if self._length != len(other):
                raise IndexError("The arguments for bitwise operations must have same length.")
            return Bitarray(self._length, self._bits | other._bits)
    
        def __xor__(self, other):
            if not isinstance(other, Bitarray):
                raise TypeError("Unsupported operand type(s) for &: '{}' and '{}'".format(type(self).__name__, type(other).__name__))
            if self._length != len(other):
                raise IndexError("The arguments for bitwise operations must have same length.")
            return Bitarray(self._length, self._bits ^ other._bits)
    
        # to string
        def __str__(self):
            return format(self._bits, f'0{self._length}b')
    
        def __repr__(self):
            length = (self._length + 3) // 4
            return f'Bitarray(bits={self._length}, value=0x{self._bits:0{length}X})'
    
    a = Bitarray(8, 0xAF)
    b = Bitarray(8, 0x55)
    print(repr(a), repr(b))
    print(a & b)
    print(a | b)
    print(a ^ b)
    c = Bitarray.from_bytes(os.urandom(32))
    print(repr(c))
    d = Bitarray.from_bytes(b'\xff\x00')
    print(repr(d))
    e = Bitarray.from_bytes(b'\xff\x00', 'big')
    print(repr(e))
    

    Output:

    Bitarray(bits=8, value=0xAF) Bitarray(bits=8, value=0x55)
    00000101
    11111111
    11111010
    Bitarray(bits=256, value=0x976E921A9C93C7D5D9C4BB6722B38064A04EA882CA4DF76981F52C7097E0DDFF)
    Bitarray(bits=16, value=0x00FF)
    Bitarray(bits=16, value=0xFF00)