rustvector

How to create uninitialized Vec


I've written some code that reads a binary file into a Vec. I did it using Vec::with_capacity() and unsafe { ...set_len(); }. It works, but clippy complains and the official docs claim that reading into an unintialized vec is undefined behavior. Since the compiler lets it happen, I suspect it's not really that undefined, and I can just shut up clippy with an #[allow(...)], but I'd like to do it "right" if I can. There's lots of examples of using MaybeUninit<> with variables and arrays, but not with Vec, and many of the examples jump through ridiculous hoops for what is a really simple thing. This is a big Vec, so initializing it is a totally unacceptable cost. So what's the "no cost abstraction" way of doing this?

lazy_static! {
/// Map from perfect hash to equivalence class
pub static ref OJP_HIGH_MP5_TABLE_1: Vec<u16> = {
    let mut path: PathBuf = cargo_home().unwrap();
    path.push("onejoker");
    path.push("ojp_high_mp5_table_1.bin.gz");

    let file = fs::File::open(&path).unwrap();
    let mut reader = BufReader::new(file);
    let mut decoder = GzDecoder::new(&mut reader);

    let mut bytes: Vec<u8> = Vec::with_capacity((MP5_SIZE + 1) * 2);
    #[allow(clippy::uninit_vec)]
    unsafe { bytes.set_len((MP5_SIZE + 1) * 2) };
    let mut buf: [MaybeUninit<u8>; IO_BUF_SIZE] =
    unsafe { MaybeUninit::uninit().assume_init() };
    let mut start: usize = 0;

    loop {
        let n = decoder.read(
            unsafe { &mut *(buf.as_mut_ptr() as *mut [u8; IO_BUF_SIZE]) }
        ).unwrap();
        if n == 0 {
            break;
        }
        bytes[start..start + n].copy_from_slice(
            unsafe { &*(buf.as_ptr() as *const [u8; IO_BUF_SIZE]) }[..n].as_ref()
        );
        start += n;
    }
    assert_eq!(start, (MP5_SIZE + 1) * 2);

    let len = bytes.len() / 2;
    let capacity = bytes.capacity() / 2;
    let ptr = bytes.as_mut_ptr() as *mut u16;
    std::mem::forget(bytes);    // prevents deallocation of the vec

    unsafe { Vec::from_raw_parts(ptr, len, capacity) }
};

}


Solution

  • As mentioned in the comments by cdhowie and cafce25, you’re mainly doing a Read::read_to_end. Your loop gets the same result as this safe Rust, but the safe Rust actually copies less (there’s no intermediate buf):

    let file = fs::File::open(&path).unwrap();
    let mut reader = BufReader::new(file);
    let mut decoder = GzDecoder::new(&mut reader);
    
    let mut bytes: Vec<u8> = Vec::with_capacity((MP5_SIZE + 1) * 2);
    decoder.read_to_end(&mut bytes).unwrap();
    assert_eq!(bytes.len(), (MP5_SIZE + 1) * 2);
    

    Exposing this Vec<u8> as u16s is a little more involved, but you can still do it in safe Rust with a crate like bytemuck. A Vec<u8>’s storage only has 1-byte alignment, so as you brought up in a comment, you would have to allocate an extra byte of space to guarantee correctness:

    // note: with a suitable error type,
    // this can return `Result` and use `?` instead of `unwrap`
    fn read_table<T, const N: usize>(mut source: impl Read) -> &'static [T; N]
    where
        T: bytemuck::AnyBitPattern,
    {
        use std::mem;
    
        let align = mem::align_of::<[T; N]>();
        let mut bytes: Vec<u8> = Vec::with_capacity(
            mem::size_of::<[T; N]>()
            + (align - 1)
        );
    
        // ensure actual data is aligned
        let align_offset = bytes.as_ptr().align_offset(align);
        bytes.resize(align_offset, 0);
    
        source.read_to_end(&mut bytes).unwrap();
    
        bytemuck::cast_slice(&bytes.leak()[align_offset..]).try_into().unwrap()
    }
    
    lazy_static! {
    
    pub static ref OJP_HIGH_MP5_TABLE_1: &'static [u16; MP5_SIZE + 1] = {
        let mut path: PathBuf = cargo_home().unwrap();
        path.push("onejoker");
        path.push("ojp_high_mp5_table_1.bin.gz");
    
        let file = fs::File::open(&path).unwrap();
        let mut reader = BufReader::new(file);
        let decoder = GzDecoder::new(&mut reader);
    
        read_table(decoder)
    };
    
    }
    

    playground