rustunsafemaybeuninit

How to drop a MaybeUninit of vector or array which is partially initialized?


I'm looking for information and good practices for using MaybeUninit to directly initialize collections (typically arrays or vectors) and drop them properly if initialization failed.

Thanks to the API examples, I was able to get by fairly quickly with arrays but it was much trickier with vectors. On the example that follows (which is a toy simplification of what I did in my project), generic function, try_new<T: TryFrom<()>, A:ArrayUninit<T>>(len: usize), tries to create an array or a vector of objects T by means of a fallible data generator TryFrom::try_from(_:()) implemented by T. The order in which the array is generated is random (asynchronism); this is simulated by function indices(len:usize). Function, try_new<A:ArrayUninit>(len: usize), uses method ArrayUninit::try_uninit(len: usize), implemented by Vec<Data> and [Data;N], for building uninitialized array or vector.

In our main, we use data type, Data, as example, for which generator, TryFrom<()> is implemented.

The following code seems to work, but I'm wondering how to drop uninitialized data: (playground)

use core::{ time::Duration, mem::MaybeUninit, };
use std::thread;

use rand::prelude::*;

// trait with method for building uninited array/vector
// implementations for Vec<T> and [T;N] after the main()
trait ArrayUninit<T>: AsMut<[T]> + Sized { 
    fn try_uninit(len: usize) -> Result<MaybeUninit<Self>,String>;
}

// generate shuffled indices
fn indices(len: usize) -> Box<dyn Iterator<Item = usize>>  {
    let mut vec: Vec<usize> = (0..len).collect();
    vec.shuffle(&mut thread_rng());
    Box::new(vec.into_iter())
} 

// try to build an array or a vector of objects T
fn try_new<T: TryFrom<()>, A:ArrayUninit<T>>(len: usize) -> Result<A,String> {
    // build uninitialized collection
    let mut uninited = A::try_uninit(len)?;
    // simulate initialization in random order
    let indices = indices(len);
    // build a mutable ref to the array/vector
    let ra: &mut A  = unsafe {(uninited.as_mut_ptr() as *mut A).as_mut() }.unwrap();
    let mut failed = false;
    for i in indices {
        // get ptr at i        
        let ptr_arr: * mut T = unsafe{AsMut::<[T]>::as_mut(ra).as_mut_ptr().add(i)};
        // get object and break if failed
        let data = match T::try_from(()) {
            Ok(data) => data, Err(_) => { failed = true; break; },
        };
        // set object
        unsafe { *ptr_arr = data };
    }
    if !failed { 
        Ok(unsafe{ uninited.assume_init() }) // return array, if successful
    } else {
        // if failed, then
        for i in 0..len { // drop all objects within array/vector
            let ptr_arr: * mut T = unsafe{AsMut::<[T]>::as_mut(ra).as_mut_ptr().add(i)};
            drop(unsafe { ptr_arr.read() });
        }
        drop(uninited); // and drop uninited array/vector
        Err(format!("failed to init"))
    }
} 

// Object Data
#[derive(Debug)]
struct Data(f64);
impl TryFrom<()> for Data {
    type Error = ();
    // generate a float with errors; time consuming
    fn try_from(_:()) -> Result<Self,()> {
        thread::sleep(Duration::from_millis(10));
        let f = rand::random();
        if f <= 0.99 { Ok(Data(f)) } else { Err(()) }
    }
}


fn main() {
    let result: Result<Vec<Data>,_> = try_new(3);
    println!("result: {:?}",result);
    let result: Result<[Data;3],_> = try_new(3);
    println!("result: {:?}",result);
    let result: Result<Vec<Data>,_> = try_new(1000);
    println!("result: {:?}",result);
    let result: Result<[Data;1000],_> = try_new(1000);
    println!("result: {:?}",result);
}


impl<T> ArrayUninit<T> for Vec<T> {
    fn try_uninit(len: usize) -> Result<MaybeUninit<Self>,String> {
        let mut v: MaybeUninit<Vec<T>> = MaybeUninit::uninit();
        let mut vv = Vec::with_capacity(len);
        unsafe { vv.set_len(len) };
        v.write(vv);
        Ok(v)
    }
}
impl<T,const N: usize> ArrayUninit<T> for [T;N] {
    fn try_uninit(len: usize) -> Result<MaybeUninit<Self>,String> {
        if len == N { 
            Ok(MaybeUninit::uninit())
        } else { Err(format!("len differs from array size")) }
    }
}

Here is an example of run (results are random):

Standard Error

   Compiling playground v0.0.1 (/playground)
    Finished dev [unoptimized + debuginfo] target(s) in 0.84s
     Running `target/debug/playground`

Standard Output

result: Ok([Data(0.9778296353515407), Data(0.9319034033060891), Data(0.11046580243682291)])
result: Ok([Data(0.749182522350767), Data(0.5432451150541627), Data(0.6840763419767837)])
result: Err("failed to init")
result: Err("failed to init")

For now, in case of failure, I drop all the addresses within the array/vector, both initialized and uninitialized, then I drop the array/vector. It seems to work, but I'm surprised that one can also drop uninitialized data.

Can anyone confirm if this is a right approach to drop the uninitialized data? If not, what are the rules to follow?

[EDIT]:
Thanks to the remarks of @isaactfa and @Chayim, I updated the code as follows (playground):

use core::{ time::Duration, mem::MaybeUninit, };
use std::thread;

use rand::prelude::*;

// trait with method for building uninited array/vector
// implementations for Vec<T> and [T;N] after the main()
trait ArrayUninit<T>: AsMut<[T]> + Sized {
    type Uninited: Sized;
    fn try_uninit(len: usize) -> Result<Self::Uninited,String>;
    unsafe fn set(uninit: &mut Self::Uninited, i: usize, t: T);
    unsafe fn destructor(uninit: &mut Self::Uninited,);
    unsafe fn finalize(uninit: Self::Uninited) -> Self;
}

// generate shuffled indices
fn indices(len: usize) -> Box<dyn Iterator<Item = usize>>  {
    let mut vec: Vec<usize> = (0..len).collect();
    vec.shuffle(&mut thread_rng());
    Box::new(vec.into_iter())
} 

// try to build an array or a vector of objects T
fn try_new<T: TryFrom<()>, A:ArrayUninit<T>>(len: usize) -> Result<A,String> {
    // build uninitialized collection
    let mut uninited = A::try_uninit(len)?;
    // simulate initialization in random order
    let indices = indices(len);
    let mut failed = false;
    for i in indices {
        // get object and break if failed
        let data = match T::try_from(()) {
            Ok(data) => { data }, Err(_) => { failed = true; break; },
        };
        // set object
        unsafe { A::set(&mut uninited,i,data) };
    }
    if !failed { 
        Ok(unsafe{ A::finalize(uninited) }) // return array, if successful
    } else {
        unsafe { A::destructor(&mut uninited) };
        Err(format!("failed to init"))
    }
} 

// Object Data
#[derive(Debug)]
struct Data(String);
impl TryFrom<()> for Data {
    type Error = ();
    // generate a float with errors; time consuming
    fn try_from(_:()) -> Result<Self,()> {
        thread::sleep(Duration::from_millis(10));
        let f:f32 = rand::random();
        if f <= 0.99 { Ok(Data(format!("Value = {}",f))) } else { Err(()) }
    }
}


fn main() {
    let result: Result<Vec<Data>,_> = try_new(3);
    println!("result: {:?}",result);
    let result: Result<[Data;3],_> = try_new(3);
    println!("result: {:?}",result);
    let result: Result<Vec<Data>,_> = try_new(3);
    println!("result: {:?}",result);
    let result: Result<[Data;3],_> = try_new(3);
    println!("result: {:?}",result);
    let result: Result<Vec<Data>,_> = try_new(1000);
    println!("result: {:?}",result);
    let result: Result<[Data;1000],_> = try_new(1000);
    println!("result: {:?}",result);
    let result: Result<Vec<Data>,_> = try_new(1000);
    println!("result: {:?}",result);
    let result: Result<[Data;1000],_> = try_new(1000);
    println!("result: {:?}",result);
}


impl<T> ArrayUninit<T> for Vec<T> {
    type Uninited = (Vec<T>,Vec<bool>);
    fn try_uninit(len: usize) -> Result<Self::Uninited,String> {
        Ok((Vec::with_capacity(len),vec![false;len]))
    }
    unsafe fn set((uninit,flag): &mut Self::Uninited, i: usize, t: T) {
        uninit.as_mut_ptr().offset(i as isize).write(t); flag[i] = true;
    }
    unsafe fn destructor((uninit,flag): &mut Self::Uninited,) {
        for i in 0..flag.len() {
            if flag[i] { std::ptr::drop_in_place(uninit.as_mut_ptr().offset(i as isize)); }
        }
    }
    unsafe fn finalize((mut uninit,flag): Self::Uninited) -> Self {
        uninit.set_len(flag.len());
        uninit
    }
}
impl<T,const N: usize> ArrayUninit<T> for [T;N] {
    type Uninited = ([MaybeUninit<T>;N],[bool;N]);
    fn try_uninit(len: usize) -> Result<Self::Uninited,String> {
        if len == N {
            let uninit = unsafe{ MaybeUninit::uninit().assume_init() };
            Ok((uninit,[false;N]))
        } else { Err(format!("len differs from array size")) }
    }
    unsafe fn set((uninit,flag): &mut Self::Uninited, i: usize, t: T) {
        uninit[i].write(t); flag[i] = true;
    }
    unsafe fn destructor((uninit,flag): &mut Self::Uninited,) {
        for i in 0..N {
            if flag[i] { std::ptr::drop_in_place(uninit[i].as_mut_ptr()); }
        }
    }
    unsafe fn finalize((uninit,_): Self::Uninited) -> Self {
        (&uninit as *const _ as *const Self).read()
    }
}

The idea here is to use specific approaches for arrays and vecs, which are encoded within trait ArrayUninit. MaybeUninit is used only for arrays, while it is not needed for vecs.


Solution

  • Your code contains multiple points of UB: