In libaio, io_prep_pwritev
macro prepares the command and sets the iocb struct. Isn't iocb.c.u.nbytes
supposed to be number of bytes? In io_prep_pwritev
, this is set to iovcnt which doesn't seem correct. Why is that?
It's correct, but looks strange. The same ABI is shared by PWRITE and PWRITEV. For PWRITE, it works how you'd expect, with buf
set to the data buffer and nbytes
set to the number of bytes:
static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PWRITE;
iocb->aio_reqprio = 0;
iocb->u.c.buf = buf;
iocb->u.c.nbytes = count;
iocb->u.c.offset = offset;
}
But for PWRITEV, it repurposes the ABI arguments to convey the iovec array and its length into the kernel.
static inline void io_prep_pwritev(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PWRITEV;
iocb->aio_reqprio = 0;
iocb->u.c.buf = (void *)iov;
iocb->u.c.nbytes = iovcnt;
iocb->u.c.offset = offset;
}
Then, inside the kernel in fs/aio.c
, it switches on whether the I/O operation is vectored
. In the non-vectored case, it decodes the args as a single buffer pointer and number of bytes. In the vectored case, it calls into lib/iov_iter.c:import_iovec
to decode the buf
and nbytes
args as an iovec and the length of the iovec.
Here's where it decodes the opcode, note the booleans indicating vectored
operations:
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
return aio_read(&req->rw, iocb, false, compat);
case IOCB_CMD_PWRITE:
return aio_write(&req->rw, iocb, false, compat);
case IOCB_CMD_PREADV:
return aio_read(&req->rw, iocb, true, compat);
case IOCB_CMD_PWRITEV:
return aio_write(&req->rw, iocb, true, compat);
case IOCB_CMD_FSYNC:
return aio_fsync(&req->fsync, iocb, false);
case IOCB_CMD_FDSYNC:
return aio_fsync(&req->fsync, iocb, true);
case IOCB_CMD_POLL:
return aio_poll(req, iocb);
default:
pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
return -EINVAL;
}
And, here's where it handles decoding the args:
static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
struct iovec **iovec, bool vectored, bool compat,
struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
size_t len = iocb->aio_nbytes;
if (!vectored) {
ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
*iovec = NULL;
return ret;
}
#ifdef CONFIG_COMPAT
if (compat)
return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
iter);
#endif
return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
}