swiftpointersmemory-managementmtlbuffer

How do I specify a deallocator for memory allocated with mem_align in Swift?


I am creating paged aligned memory with memory_align, then I create a MTLBuffer from that with no copy. The GPU then blits data into that MTLBuffer. When that completes, I wrap that same memory in Data with Data.init(bytesNoCopy:count:deallocator:) to pass on in my project. I don't know what to use as the deallocator. I translating this code from an Apple tutorial written in OBJ-C. The Apple code is here. I spent two days trying to understand this researching myself.

The Apple OBJ-C code deallocator looks like this. This is beyond my OBJ-C knowledge.

// Block to dealloc memory created with vm_allocate
   void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
                ^(void *bytes, NSUInteger length)
                {
                    vm_deallocate((vm_map_t)mach_task_self(),
                                  (vm_address_t)bytes,
                                  length);
                };

The code in question is towards the end of my listing.

  // Blit all positions and velocities and provide them to the client either to show final results
  // or continue the simulation on another device
  func provideFullData(
    _ dataProvider: AAPLFullDatasetProvider,
    forSimulationTime time: CFAbsoluteTime
  ) {
    let positionDataSize = positions[oldBufferIndex]!.length
    let velocityDataSize = velocities[oldBufferIndex]!.length
    var positionDataAddress: UnsafeMutableRawPointer? = nil
    var velocityDataAddress: UnsafeMutableRawPointer? = nil

// Create buffers to transfer data to client
do {
  
  // allocate memory on page aligned addresses use by both GPU and CPU
  let alignment = 0x4000
  
  // make length a mulitple of alignment
  let positionAllocationSize = (positionDataSize + alignment - 1) & (~(alignment - 1))
  posix_memalign(&positionDataAddress, alignment, positionAllocationSize)
  
  let velocityAllocationSize = (velocityDataSize + alignment - 1) & (~(alignment - 1))
  posix_memalign(&positionDataAddress, alignment, velocityAllocationSize)
}

// Blit positions and velocities to a buffer for transfer
do {
  // create MTL buffers with created mem allighed
  let positionBuffer = device.makeBuffer(
    bytesNoCopy: &positionDataAddress,
    length: positionDataSize,
    options: .storageModeShared,
    deallocator: nil)
  
  positionBuffer?.label = "Final Positions Buffer"
  
  let velocityBuffer = device.makeBuffer(
    bytesNoCopy: &velocityDataAddress,
    length: velocityDataSize,
    options: .storageModeShared,
    deallocator: nil)
  
  velocityBuffer?.label = "Final Velocities Buffer"
  
  let commandBuffer = commandQueue?.makeCommandBuffer()
  commandBuffer?.label = "Full Transfer Command Buffer"
  
  let blitEncoder = commandBuffer?.makeBlitCommandEncoder()
  
  blitEncoder?.label = "Full Transfer Blits"
  
  blitEncoder?.pushDebugGroup("Full Position Data Blit")
  
  if let _position = positions[oldBufferIndex], let positionBuffer {
    blitEncoder?.copy(
      from: _position,
      sourceOffset: 0,
      to: positionBuffer,
      destinationOffset: 0,
      size: positionBuffer.length)
  }
  
  blitEncoder?.popDebugGroup()
  
  blitEncoder?.pushDebugGroup("Full Velocity Data Blit")
  
  if let _velocity = velocities[oldBufferIndex], let velocityBuffer {
    blitEncoder?.copy(
      from: _velocity,
      sourceOffset: 0,
      to: velocityBuffer,
      destinationOffset: 0,
      size: velocityBuffer.length)
  }
  
  blitEncoder?.popDebugGroup()
  
  blitEncoder?.endEncoding()
  
  commandBuffer?.commit()
  
  // Ensure blit of data is complete before providing
  // the data to the client
  commandBuffer?.waitUntilCompleted()
}

// Wrap the memory allocated with vm_allocate
// with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the
// memory's lifetime. Initialize NSData object
// with a deallocation block to free the
// vm_allocated memory when the object has been
// deallocated
do {
   //this code was in obj-c I don'tlnow how to convert this to swift
   // Block to dealloc memory created with vm_allocate
   // let deallocProvidedAddress: ((_ bytes: UnsafeMutableRawPointer?, _ length: Int) -> Void)? =
   // { bytes, length in
   // vm_deallocate(
   // mach_task_self() as? vm_map_t,
   // bytes as? vm_address_t,
   // length)
   // }
  let positionData = Data(
    bytesNoCopy: &positionDataAddress,
    count: positionDataSize,
    deallocator: .none) // this may be a memory leak
  
  let velocityData = Data(
    bytesNoCopy: &velocityDataAddress,
    count: velocityDataSize,
    deallocator: .none) // this may be a memory leak
  
  dataProvider(positionData, velocityData, time)
}

}

Here is the listing for the Apple OBJ-C code

// Set the initial positions and velocities of the simulation based upon the simulation's config
- (void)initializeData
{
    const float pscale = _config->clusterScale;
    const float vscale = _config->velocityScale * pscale;
    const float inner  = 2.5f * pscale;
    const float outer  = 4.0f * pscale;
    const float length = outer - inner;

    _oldBufferIndex = 0;
    _newBufferIndex = 1;

    vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
    vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;

    for(int i = 0; i < _config->numBodies; i++)
    {
        vector_float3 nrpos    = generate_random_normalized_vector(-1.0, 1.0, 1.0);
        vector_float3 rpos     = generate_random_vector(0.0, 1.0);
        vector_float3 position = nrpos * (inner + (length * rpos));

        positions[i].xyz = position;
        positions[i].w = 1.0;

        vector_float3 axis = {0.0, 0.0, 1.0};

        float scalar = vector_dot(nrpos, axis);

        if((1.0f - scalar) < 1e-6)
        {
            axis.xy = nrpos.yx;

            axis = vector_normalize(axis);
        }

        vector_float3 velocity = vector_cross(position, axis);

        velocities[i].xyz = velocity * vscale;
    }

    NSRange fullRange;
    fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
    [_positions[_oldBufferIndex] didModifyRange:fullRange];
    fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
    [_velocities[_oldBufferIndex] didModifyRange:fullRange];
}

/// Set simulation data for a simulation that was begun elsewhere (i.e. on another device)
- (void)setPositionData:(nonnull NSData *)positionData
           velocityData:(nonnull NSData *)velocityData
      forSimulationTime:(CFAbsoluteTime)simulationTime
{
    _oldBufferIndex = 0;
    _newBufferIndex = 1;

    vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
    vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;

    assert(_positions[_oldBufferIndex].length == positionData.length);
    assert(_velocities[_oldBufferIndex].length == velocityData.length);

    memcpy(positions, positionData.bytes, positionData.length);
    memcpy(velocities, velocityData.bytes, velocityData.length);

    NSRange fullRange;
    fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
    [_positions[_oldBufferIndex] didModifyRange:fullRange];
    fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
    [_velocities[_oldBufferIndex] didModifyRange:fullRange];

    _simulationTime = simulationTime;
}

/// Blit a subset of the positions data for this frame and provide them to the client
/// to show a summary of the simulation's progress
- (void)fillUpdateBufferWithPositionBuffer:(nonnull id<MTLBuffer>)buffer
                        usingCommandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer
{
    id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
    blitEncoder.label = @"Position Update Blit Encoder";

    [blitEncoder pushDebugGroup:@"Position Update Blit Commands"];

    [blitEncoder copyFromBuffer:buffer
                   sourceOffset:0
                       toBuffer:_updateBuffer[_currentBufferIndex]
              destinationOffset:0
                           size:_updateBuffer[_currentBufferIndex].length];

    [blitEncoder popDebugGroup];

    [blitEncoder endEncoding];
}

/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
- (void)provideFullData:(nonnull AAPLFullDatasetProvider)dataProvider
      forSimulationTime:(CFAbsoluteTime)time
{
    NSUInteger positionDataSize = _positions[_oldBufferIndex].length;
    NSUInteger velocityDataSize = _velocities[_oldBufferIndex].length;
    void *positionDataAddress = NULL;
    void *velocityDataAddress = NULL;

    // Create buffers to transfer data to client
    {
        // Use vm allocate to allocate buffer on page aligned address
        kern_return_t err;
      
        err = vm_allocate((vm_map_t)mach_task_self(),
                          (vm_address_t*)&positionDataAddress,
                          positionDataSize,
                          VM_FLAGS_ANYWHERE);
        assert(err == KERN_SUCCESS);

        err = vm_allocate((vm_map_t)mach_task_self(),
                          (vm_address_t*)&velocityDataAddress,
                          velocityDataSize,
                          VM_FLAGS_ANYWHERE);
        assert(err == KERN_SUCCESS);
    }

    // Blit positions and velocities to a buffer for transfer
    {
        id<MTLBuffer> positionBuffer = [_device newBufferWithBytesNoCopy:positionDataAddress
                                                                  length:positionDataSize
                                                                 options:MTLResourceStorageModeShared
                                                             deallocator:nil];

        positionBuffer.label = @"Final Positions Buffer";

        id<MTLBuffer> velocityBuffer = [_device newBufferWithBytesNoCopy:velocityDataAddress
                                                                  length:velocityDataSize
                                                                 options:MTLResourceStorageModeShared
                                                             deallocator:nil];

        velocityBuffer.label = @"Final Velocities Buffer";

        id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
        commandBuffer.label = @"Full Transfer Command Buffer";

        id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];

        blitEncoder.label = @"Full Transfer Blits";

        [blitEncoder pushDebugGroup:@"Full Position Data Blit"];

        [blitEncoder copyFromBuffer:_positions[_oldBufferIndex]
                       sourceOffset:0
                           toBuffer:positionBuffer
                  destinationOffset:0
                               size:positionBuffer.length];

        [blitEncoder popDebugGroup];

        [blitEncoder pushDebugGroup:@"Full Velocity Data Blit"];

        [blitEncoder copyFromBuffer:_velocities[_oldBufferIndex]
                       sourceOffset:0
                           toBuffer:velocityBuffer
                  destinationOffset:0
                               size:velocityBuffer.length];

        [blitEncoder popDebugGroup];

        [blitEncoder endEncoding];

        [commandBuffer commit];

        // Ensure blit of data is complete before providing the data to the client
        [commandBuffer waitUntilCompleted];
    }

    // Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
    // rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
    // with a deallocation block to free the vm_allocated memory when the object has been
    // deallocated
    {
        // Block to dealloc memory created with vm_allocate
        void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
            ^(void *bytes, NSUInteger length)
            {
                vm_deallocate((vm_map_t)mach_task_self(),
                              (vm_address_t)bytes,
                              length);
            };

        NSData *positionData = [[NSData alloc] initWithBytesNoCopy:positionDataAddress
                                                            length:positionDataSize
                                                       deallocator:deallocProvidedAddress];

        NSData *velocityData = [[NSData alloc] initWithBytesNoCopy:velocityDataAddress
                                                            length:velocityDataSize
                                                       deallocator:deallocProvidedAddress];

        dataProvider(positionData, velocityData, time);
    }
}

Solution

  • You define the deallocation block (or even a named function) similar to the way its done in Obj-C, though some casting is needed. The Obj-C deallocator block becomes the following closure in Swift:

            let deallocProvidedAddress = {
                (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
                vm_deallocate(mach_task_self_, vm_offset_t(bitPattern: bytes), vm_size_t(length))
            }
    

    Then instead of .none for the deallocator parameter for Data(bytesNoCopy:count:deallocator), you pass .custom(deallocProvidedAddress).

            let positionData = Data(
                bytesNoCopy: &positionDataAddress,
                count: positionDataSize,
                deallocator: .custom(deallocProvidedAddress))
            
            let velocityData = Data(
                bytesNoCopy: &velocityDataAddress,
                count: velocityDataSize,
                deallocator: .custom(deallocProvidedAddress))
            
            dataProvider(positionData, velocityData, time)
    

    However, since you don't call vm_allocate, but instead use posix_memalign, you'd need to call free instead of vm_deallocate in deallocProvidedAddress:

            let deallocProvidedAddress = {
                (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
                free(bytes)
            }
    

    How did I know to use free? Having never actually used posix_memalign myself, I just did man posix_memalign in Terminal, and it says, among other things:

    Memory that is allocated via posix_memalign() can be used as an argument in subsequent calls to realloc(3), reallocf(3), and free(3).

    So free is the appropriate way to deallocate memory allocated via posix_memalign

    This is my translation of the Obj-C version of provideFullData into Swift. It uses vm_allocate and vm_deallocate since that's what the Obj-C version does, but you can easily replace that with posix_memalign and free, if you like:

        /// Blit all positions and velocities and provide them to the client either to show final results
        /// or continue the simulation on another device
        func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
        {
            let positionDataSize = positions[oldBufferIndex]!.length
            let velocityDataSize = velocities[oldBufferIndex]!.length
            
            func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
            {
                var address: vm_address_t = 0
                let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
                return err == KERN_SUCCESS
                    ? UnsafeMutableRawPointer(bitPattern: address)
                    : nil
            }
            
            func makeMTLBuffer(
                from bytes: UnsafeMutableRawPointer,
                count: Int,
                labeled label: String) -> MTLBuffer?
            {
                guard let buffer = device.makeBuffer(
                    bytesNoCopy: bytes,
                    length: count,
                    options: [.storageModeShared],
                    deallocator: nil)
                else { return nil }
                
                buffer.label = label
                return buffer
            }
            
            guard let positionDataAddress = vm_alloc(count: positionDataSize) else {
                fatalError("failed to allocate position data")
            }
            guard let velocityDataAddress = vm_alloc(count: velocityDataSize) else {
                fatalError("failed to allocate velocity data")
            }
    
            // Blit positions and velocities to a buffer for transfer
            guard let positionBuffer = makeMTLBuffer(
                    from: positionDataAddress,
                    count: positionDataSize,
                    labeled: "Final Positions Buffer")
            else { fatalError("Failed to allocate positions MTLBuffer") }
            
            guard let velocityBuffer = makeMTLBuffer(
                    from: velocityDataAddress,
                    count: velocityDataSize,
                    labeled: "Final Velocities Buffer")
            else { fatalError("Failed to allocate velocities MTLBuffer") }
            
            guard let commandBuffer = commandQueue.makeCommandBuffer() else {
                fatalError("Failed to make commandBuffer")
            }
            commandBuffer.label = "Full Transfer Command Buffer"
    
    
            guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
                fatalError("Failed to make blitEncoder")
            }
            blitEncoder.label = "Full Transfer Blits"
    
            blitEncoder.pushDebugGroup("Full Position Data Blit")
            blitEncoder.copy(
                from: positions[oldBufferIndex]!,
                sourceOffset: 0,
                to: positionBuffer,
                destinationOffset: 0,
                size: positionBuffer.length
            )
            blitEncoder.popDebugGroup()
    
            blitEncoder.pushDebugGroup("Full Velocity Data Blit")
            blitEncoder.copy(
                from: velocities[oldBufferIndex]!,
                sourceOffset: 0,
                to: velocityBuffer,
                destinationOffset: 0,
                size: velocityBuffer.length
            )
            blitEncoder.popDebugGroup()
            
            blitEncoder.endEncoding()
            
            commandBuffer.commit()
    
            // Ensure blit of data is complete before providing the data to the client
            commandBuffer.waitUntilCompleted()
    
    
            // Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
            // rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
            // with a deallocation block to free the vm_allocated memory when the object has been
            // deallocated
            // Block to dealloc memory created with vm_allocate
            let deallocProvidedAddress =
            { (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
                vm_deallocate(
                    mach_task_self_,
                    vm_offset_t(bitPattern: bytes),
                    vm_size_t(length)
                )
            }
    
            let positionData = Data(
                bytesNoCopy: positionDataAddress,
                count: positionDataSize,
                deallocator: .custom(deallocProvidedAddress))
            
            let velocityData = Data(
                bytesNoCopy: velocityDataAddress,
                count: velocityDataSize,
                deallocator: .custom(deallocProvidedAddress))
            
            dataProvider(positionData, velocityData, time)
        }
    

    I see lots of opportunities for refactoring here (I already did a little bit). If you do something other than fatalError in the "sad" path, don't forget that you need to deallocate positionDataAddress and velocityDataAddress before returning or throwing. I would at least refactor it so that each Data instance is made immediately after its successful vm_allocate/posix_memalign instead of waiting until the very end of the method, that way, in case of errors, clean up can happen automatically. I'd also extract all the Metal blit code into it's own function.

    Refactored version

    I was originally going to let the above version stand as is, but it cries out for reorganization, so I refactored it as I suggested above, plus a bit more.

    For convenience, I created an extension on MTLBlitCommandEncoder to encode a copy from an MTLBuffer to Data:

    fileprivate extension MTLBlitCommandEncoder
    {
        func encodeCopy(
            from src: MTLBuffer,
            to dst: MTLBuffer,
            dstName: @autoclosure () -> String)
        {
            #if DEBUG
            pushDebugGroup("Full \(dstName()) Data Blit")
            defer { popDebugGroup() }
            #endif
            
            copy(
                from: src, sourceOffset: 0,
                to: dst, destinationOffset: 0,
                size: dst.length
            )
        }
        
        func encodeCopy(
            from src: MTLBuffer,
            to dst: inout Data,
            dstName: @autoclosure () -> String)
        {
            dst.withUnsafeMutableBytes
            {
                guard let buffer = device.makeBuffer(
                        bytesNoCopy: $0.baseAddress!,
                        length: $0.count,
                        options: [.storageModeShared],
                        deallocator: nil)
                else { fatalError("Failed to allocate MTLBuffer for \(dstName())") }
                
                #if DEBUG
                buffer.label = "\(dstName()) Buffer"
                #endif
                
                encodeCopy(from: src, to: buffer, dstName: dstName())
            }
        }
    }
    

    I moved nested functions to fileprivate methods, and changed from a closure for the custom deallocator to static method, renaming it to vm_dealloc:

        fileprivate static func vm_dealloc(
            _ bytes: UnsafeMutableRawPointer,
            _ length: Int)
        {
            vm_deallocate(
                mach_task_self_,
                vm_offset_t(bitPattern: bytes),
                vm_size_t(length)
            )
        }
        
        fileprivate func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
        {
            var address: vm_address_t = 0
            let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
            return err == KERN_SUCCESS
                ? UnsafeMutableRawPointer(bitPattern: address)
                : nil
        }
    

    Since the pointer will be stored in an instance of Data anyway, and Data can handle clean up automatically, I write vmAllocData(count:) to allocate the memory, and then immediately put it in a Data. The calling code doesn't need to worry about the underlying pointer anymore.

        fileprivate func vmAllocData(count: Int) -> Data?
        {
            guard let ptr = vm_alloc(count: count) else {
                return nil
            }
            
            return Data(
                bytesNoCopy: ptr,
                count: count,
                deallocator: .custom(Self.vm_dealloc)
            )
        }
    

    Then I move the Metal code to a copy(positionsInto:andVelicitiesInto:) method. Some would quibble with the "and" in the name because it says that it's doing more than one thing, and it is... but it's matter of efficiency in using the same MTLBlitCommandEncoder to encode copying both positions and velocities. So yeah, it does more than one thing, but the other option is to create the encoder separately and pass it in which would spread the Metal code out a bit more than is necessary. I think in this case it's OK to do more than one thing for the sake of efficiency and sequestering the Metal code. Anyway, this function uses encodeCopy from the extension above:

        fileprivate func copy(
            positionsInto positionData: inout Data,
            andVelocitiesInto velocityData: inout Data)
        {
            guard let commandBuffer = commandQueue.makeCommandBuffer() else {
                fatalError("Failed to make commandBuffer")
            }
            #if DEBUG
            commandBuffer.label = "Full Transfer Command Buffer"
            #endif
    
    
            guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
                fatalError("Failed to make blitEncoder")
            }
            #if DEBUG
            blitEncoder.label = "Full Transfer Blits"
            #endif
            
            guard let positionSrc = positions[oldBufferIndex] else {
                fatalError("positions[\(oldBufferIndex)] is nil!")
            }
            blitEncoder.encodeCopy(
                from: positionSrc,
                to: &positionData,
                dstName: "Positions"
            )
            
            guard let velocitySrc = velocities[oldBufferIndex] else {
                fatalError("velocities[\(oldBufferIndex)] is nil!")
            }
            blitEncoder.encodeCopy(
                from: velocitySrc,
                to: &velocityData,
                dstName: "Velocity"
            )
            
            blitEncoder.endEncoding()
            
            commandBuffer.commit()
    
            // Ensure blit of data is complete before providing the data to the client
            commandBuffer.waitUntilCompleted()
        }
    

    Then finally provide(fullData:forSimulationTime) becomes:

        func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
        {
            let positionDataSize = positions[oldBufferIndex]!.length
            let velocityDataSize = velocities[oldBufferIndex]!.length
                    
            guard var positionData = vmAllocData(count: positionDataSize) else {
                fatalError("failed to allocate position data")
            }
            guard var velocityData = vmAllocData(count: velocityDataSize) else {
                fatalError("failed to allocate velocity data")
            }
    
            copy(positionsInto: &positionData, andVelocitiesInto: &velocityData)
    
            dataProvider(positionData, velocityData, time)
        }