I am creating paged aligned memory with memory_align, then I create a MTLBuffer from that with no copy. The GPU then blits data into that MTLBuffer. When that completes, I wrap that same memory in Data with Data.init(bytesNoCopy:count:deallocator:) to pass on in my project. I don't know what to use as the deallocator. I translating this code from an Apple tutorial written in OBJ-C. The Apple code is here. I spent two days trying to understand this researching myself.
The Apple OBJ-C code deallocator looks like this. This is beyond my OBJ-C knowledge.
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
The code in question is towards the end of my listing.
// Blit all positions and velocities and provide them to the client either to show final results
// or continue the simulation on another device
func provideFullData(
_ dataProvider: AAPLFullDatasetProvider,
forSimulationTime time: CFAbsoluteTime
) {
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
var positionDataAddress: UnsafeMutableRawPointer? = nil
var velocityDataAddress: UnsafeMutableRawPointer? = nil
// Create buffers to transfer data to client
do {
// allocate memory on page aligned addresses use by both GPU and CPU
let alignment = 0x4000
// make length a mulitple of alignment
let positionAllocationSize = (positionDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, positionAllocationSize)
let velocityAllocationSize = (velocityDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, velocityAllocationSize)
}
// Blit positions and velocities to a buffer for transfer
do {
// create MTL buffers with created mem allighed
let positionBuffer = device.makeBuffer(
bytesNoCopy: &positionDataAddress,
length: positionDataSize,
options: .storageModeShared,
deallocator: nil)
positionBuffer?.label = "Final Positions Buffer"
let velocityBuffer = device.makeBuffer(
bytesNoCopy: &velocityDataAddress,
length: velocityDataSize,
options: .storageModeShared,
deallocator: nil)
velocityBuffer?.label = "Final Velocities Buffer"
let commandBuffer = commandQueue?.makeCommandBuffer()
commandBuffer?.label = "Full Transfer Command Buffer"
let blitEncoder = commandBuffer?.makeBlitCommandEncoder()
blitEncoder?.label = "Full Transfer Blits"
blitEncoder?.pushDebugGroup("Full Position Data Blit")
if let _position = positions[oldBufferIndex], let positionBuffer {
blitEncoder?.copy(
from: _position,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length)
}
blitEncoder?.popDebugGroup()
blitEncoder?.pushDebugGroup("Full Velocity Data Blit")
if let _velocity = velocities[oldBufferIndex], let velocityBuffer {
blitEncoder?.copy(
from: _velocity,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length)
}
blitEncoder?.popDebugGroup()
blitEncoder?.endEncoding()
commandBuffer?.commit()
// Ensure blit of data is complete before providing
// the data to the client
commandBuffer?.waitUntilCompleted()
}
// Wrap the memory allocated with vm_allocate
// with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the
// memory's lifetime. Initialize NSData object
// with a deallocation block to free the
// vm_allocated memory when the object has been
// deallocated
do {
//this code was in obj-c I don'tlnow how to convert this to swift
// Block to dealloc memory created with vm_allocate
// let deallocProvidedAddress: ((_ bytes: UnsafeMutableRawPointer?, _ length: Int) -> Void)? =
// { bytes, length in
// vm_deallocate(
// mach_task_self() as? vm_map_t,
// bytes as? vm_address_t,
// length)
// }
let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .none) // this may be a memory leak
let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .none) // this may be a memory leak
dataProvider(positionData, velocityData, time)
}
}
Here is the listing for the Apple OBJ-C code
// Set the initial positions and velocities of the simulation based upon the simulation's config
- (void)initializeData
{
const float pscale = _config->clusterScale;
const float vscale = _config->velocityScale * pscale;
const float inner = 2.5f * pscale;
const float outer = 4.0f * pscale;
const float length = outer - inner;
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
for(int i = 0; i < _config->numBodies; i++)
{
vector_float3 nrpos = generate_random_normalized_vector(-1.0, 1.0, 1.0);
vector_float3 rpos = generate_random_vector(0.0, 1.0);
vector_float3 position = nrpos * (inner + (length * rpos));
positions[i].xyz = position;
positions[i].w = 1.0;
vector_float3 axis = {0.0, 0.0, 1.0};
float scalar = vector_dot(nrpos, axis);
if((1.0f - scalar) < 1e-6)
{
axis.xy = nrpos.yx;
axis = vector_normalize(axis);
}
vector_float3 velocity = vector_cross(position, axis);
velocities[i].xyz = velocity * vscale;
}
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
}
/// Set simulation data for a simulation that was begun elsewhere (i.e. on another device)
- (void)setPositionData:(nonnull NSData *)positionData
velocityData:(nonnull NSData *)velocityData
forSimulationTime:(CFAbsoluteTime)simulationTime
{
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
assert(_positions[_oldBufferIndex].length == positionData.length);
assert(_velocities[_oldBufferIndex].length == velocityData.length);
memcpy(positions, positionData.bytes, positionData.length);
memcpy(velocities, velocityData.bytes, velocityData.length);
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
_simulationTime = simulationTime;
}
/// Blit a subset of the positions data for this frame and provide them to the client
/// to show a summary of the simulation's progress
- (void)fillUpdateBufferWithPositionBuffer:(nonnull id<MTLBuffer>)buffer
usingCommandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer
{
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Position Update Blit Encoder";
[blitEncoder pushDebugGroup:@"Position Update Blit Commands"];
[blitEncoder copyFromBuffer:buffer
sourceOffset:0
toBuffer:_updateBuffer[_currentBufferIndex]
destinationOffset:0
size:_updateBuffer[_currentBufferIndex].length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
}
/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
- (void)provideFullData:(nonnull AAPLFullDatasetProvider)dataProvider
forSimulationTime:(CFAbsoluteTime)time
{
NSUInteger positionDataSize = _positions[_oldBufferIndex].length;
NSUInteger velocityDataSize = _velocities[_oldBufferIndex].length;
void *positionDataAddress = NULL;
void *velocityDataAddress = NULL;
// Create buffers to transfer data to client
{
// Use vm allocate to allocate buffer on page aligned address
kern_return_t err;
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&positionDataAddress,
positionDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&velocityDataAddress,
velocityDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
}
// Blit positions and velocities to a buffer for transfer
{
id<MTLBuffer> positionBuffer = [_device newBufferWithBytesNoCopy:positionDataAddress
length:positionDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
positionBuffer.label = @"Final Positions Buffer";
id<MTLBuffer> velocityBuffer = [_device newBufferWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
velocityBuffer.label = @"Final Velocities Buffer";
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
commandBuffer.label = @"Full Transfer Command Buffer";
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Full Transfer Blits";
[blitEncoder pushDebugGroup:@"Full Position Data Blit"];
[blitEncoder copyFromBuffer:_positions[_oldBufferIndex]
sourceOffset:0
toBuffer:positionBuffer
destinationOffset:0
size:positionBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder pushDebugGroup:@"Full Velocity Data Blit"];
[blitEncoder copyFromBuffer:_velocities[_oldBufferIndex]
sourceOffset:0
toBuffer:velocityBuffer
destinationOffset:0
size:velocityBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
[commandBuffer commit];
// Ensure blit of data is complete before providing the data to the client
[commandBuffer waitUntilCompleted];
}
// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
{
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
NSData *positionData = [[NSData alloc] initWithBytesNoCopy:positionDataAddress
length:positionDataSize
deallocator:deallocProvidedAddress];
NSData *velocityData = [[NSData alloc] initWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
deallocator:deallocProvidedAddress];
dataProvider(positionData, velocityData, time);
}
}
You define the deallocation block (or even a named function) similar to the way its done in Obj-C, though some casting is needed. The Obj-C deallocator block becomes the following closure in Swift:
let deallocProvidedAddress = {
(_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
vm_deallocate(mach_task_self_, vm_offset_t(bitPattern: bytes), vm_size_t(length))
}
Then instead of .none
for the deallocator
parameter for Data(bytesNoCopy:count:deallocator)
, you pass .custom(deallocProvidedAddress)
.
let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .custom(deallocProvidedAddress))
let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .custom(deallocProvidedAddress))
dataProvider(positionData, velocityData, time)
However, since you don't call vm_allocate
, but instead use posix_memalign
, you'd need to call free
instead of vm_deallocate
in deallocProvidedAddress
:
let deallocProvidedAddress = {
(_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
free(bytes)
}
How did I know to use free
? Having never actually used posix_memalign
myself, I just did man posix_memalign
in Terminal, and it says, among other things:
Memory that is allocated via posix_memalign() can be used as an argument in subsequent calls to realloc(3), reallocf(3), and free(3).
So free
is the appropriate way to deallocate memory allocated via posix_memalign
This is my translation of the Obj-C version of provideFullData
into Swift. It uses vm_allocate
and vm_deallocate
since that's what the Obj-C version does, but you can easily replace that with posix_memalign
and free
, if you like:
/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
{
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
{
var address: vm_address_t = 0
let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
return err == KERN_SUCCESS
? UnsafeMutableRawPointer(bitPattern: address)
: nil
}
func makeMTLBuffer(
from bytes: UnsafeMutableRawPointer,
count: Int,
labeled label: String) -> MTLBuffer?
{
guard let buffer = device.makeBuffer(
bytesNoCopy: bytes,
length: count,
options: [.storageModeShared],
deallocator: nil)
else { return nil }
buffer.label = label
return buffer
}
guard let positionDataAddress = vm_alloc(count: positionDataSize) else {
fatalError("failed to allocate position data")
}
guard let velocityDataAddress = vm_alloc(count: velocityDataSize) else {
fatalError("failed to allocate velocity data")
}
// Blit positions and velocities to a buffer for transfer
guard let positionBuffer = makeMTLBuffer(
from: positionDataAddress,
count: positionDataSize,
labeled: "Final Positions Buffer")
else { fatalError("Failed to allocate positions MTLBuffer") }
guard let velocityBuffer = makeMTLBuffer(
from: velocityDataAddress,
count: velocityDataSize,
labeled: "Final Velocities Buffer")
else { fatalError("Failed to allocate velocities MTLBuffer") }
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Failed to make commandBuffer")
}
commandBuffer.label = "Full Transfer Command Buffer"
guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
fatalError("Failed to make blitEncoder")
}
blitEncoder.label = "Full Transfer Blits"
blitEncoder.pushDebugGroup("Full Position Data Blit")
blitEncoder.copy(
from: positions[oldBufferIndex]!,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length
)
blitEncoder.popDebugGroup()
blitEncoder.pushDebugGroup("Full Velocity Data Blit")
blitEncoder.copy(
from: velocities[oldBufferIndex]!,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length
)
blitEncoder.popDebugGroup()
blitEncoder.endEncoding()
commandBuffer.commit()
// Ensure blit of data is complete before providing the data to the client
commandBuffer.waitUntilCompleted()
// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
// Block to dealloc memory created with vm_allocate
let deallocProvidedAddress =
{ (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
vm_deallocate(
mach_task_self_,
vm_offset_t(bitPattern: bytes),
vm_size_t(length)
)
}
let positionData = Data(
bytesNoCopy: positionDataAddress,
count: positionDataSize,
deallocator: .custom(deallocProvidedAddress))
let velocityData = Data(
bytesNoCopy: velocityDataAddress,
count: velocityDataSize,
deallocator: .custom(deallocProvidedAddress))
dataProvider(positionData, velocityData, time)
}
I see lots of opportunities for refactoring here (I already did a little bit). If you do something other than fatalError
in the "sad" path, don't forget that you need to deallocate positionDataAddress
and velocityDataAddress
before returning or throwing. I would at least refactor it so that each Data
instance is made immediately after its successful vm_allocate
/posix_memalign
instead of waiting until the very end of the method, that way, in case of errors, clean up can happen automatically. I'd also extract all the Metal blit code into it's own function.
I was originally going to let the above version stand as is, but it cries out for reorganization, so I refactored it as I suggested above, plus a bit more.
For convenience, I created an extension on MTLBlitCommandEncoder
to encode a copy from an MTLBuffer
to Data
:
fileprivate extension MTLBlitCommandEncoder
{
func encodeCopy(
from src: MTLBuffer,
to dst: MTLBuffer,
dstName: @autoclosure () -> String)
{
#if DEBUG
pushDebugGroup("Full \(dstName()) Data Blit")
defer { popDebugGroup() }
#endif
copy(
from: src, sourceOffset: 0,
to: dst, destinationOffset: 0,
size: dst.length
)
}
func encodeCopy(
from src: MTLBuffer,
to dst: inout Data,
dstName: @autoclosure () -> String)
{
dst.withUnsafeMutableBytes
{
guard let buffer = device.makeBuffer(
bytesNoCopy: $0.baseAddress!,
length: $0.count,
options: [.storageModeShared],
deallocator: nil)
else { fatalError("Failed to allocate MTLBuffer for \(dstName())") }
#if DEBUG
buffer.label = "\(dstName()) Buffer"
#endif
encodeCopy(from: src, to: buffer, dstName: dstName())
}
}
}
I moved nested functions to fileprivate
methods, and changed from a closure
for the custom deallocator to static
method, renaming it to vm_dealloc
:
fileprivate static func vm_dealloc(
_ bytes: UnsafeMutableRawPointer,
_ length: Int)
{
vm_deallocate(
mach_task_self_,
vm_offset_t(bitPattern: bytes),
vm_size_t(length)
)
}
fileprivate func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
{
var address: vm_address_t = 0
let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
return err == KERN_SUCCESS
? UnsafeMutableRawPointer(bitPattern: address)
: nil
}
Since the pointer will be stored in an instance of Data
anyway, and Data
can handle clean up automatically, I write vmAllocData(count:)
to allocate the memory, and then immediately put it in a Data
. The calling code doesn't need to worry about the underlying pointer anymore.
fileprivate func vmAllocData(count: Int) -> Data?
{
guard let ptr = vm_alloc(count: count) else {
return nil
}
return Data(
bytesNoCopy: ptr,
count: count,
deallocator: .custom(Self.vm_dealloc)
)
}
Then I move the Metal code to a copy(positionsInto:andVelicitiesInto:)
method. Some would quibble with the "and" in the name because it says that it's doing more than one thing, and it is... but it's matter of efficiency in using the same MTLBlitCommandEncoder
to encode copying both positions and velocities. So yeah, it does more than one thing, but the other option is to create the encoder separately and pass it in which would spread the Metal code out a bit more than is necessary. I think in this case it's OK to do more than one thing for the sake of efficiency and sequestering the Metal code. Anyway, this function uses encodeCopy
from the extension
above:
fileprivate func copy(
positionsInto positionData: inout Data,
andVelocitiesInto velocityData: inout Data)
{
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
fatalError("Failed to make commandBuffer")
}
#if DEBUG
commandBuffer.label = "Full Transfer Command Buffer"
#endif
guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
fatalError("Failed to make blitEncoder")
}
#if DEBUG
blitEncoder.label = "Full Transfer Blits"
#endif
guard let positionSrc = positions[oldBufferIndex] else {
fatalError("positions[\(oldBufferIndex)] is nil!")
}
blitEncoder.encodeCopy(
from: positionSrc,
to: &positionData,
dstName: "Positions"
)
guard let velocitySrc = velocities[oldBufferIndex] else {
fatalError("velocities[\(oldBufferIndex)] is nil!")
}
blitEncoder.encodeCopy(
from: velocitySrc,
to: &velocityData,
dstName: "Velocity"
)
blitEncoder.endEncoding()
commandBuffer.commit()
// Ensure blit of data is complete before providing the data to the client
commandBuffer.waitUntilCompleted()
}
Then finally provide(fullData:forSimulationTime)
becomes:
func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
{
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
guard var positionData = vmAllocData(count: positionDataSize) else {
fatalError("failed to allocate position data")
}
guard var velocityData = vmAllocData(count: velocityDataSize) else {
fatalError("failed to allocate velocity data")
}
copy(positionsInto: &positionData, andVelocitiesInto: &velocityData)
dataProvider(positionData, velocityData, time)
}