Think of a very large file, too large for the box to hold the file as well as a copy.
I would like a command that pipes the file into a stream, just like cat does, but freeing all blocks that have been piped, so the file is gone not only afterwards, but incrementally while the piping is done.
I have written the following C program that uses fallocate( FALLOC_FL_COLLAPSE_RANGE
to remove the bytes in block size from the front of the file. Works ok.
#define _GNU_SOURCE
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/ioctl.h>
#include <sys/statfs.h>
#include <sys/stat.h>
#include <linux/fs.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <inttypes.h>
#include <stdbool.h>
static bool cfg_verbose = false;
static bool cfg_unlink = true;
static bool cfg_sleep = false;
static size_t cfg_sleep_ms = 1000;
static size_t cfg_maxbuf = 4 * 1024 * 1024;
static const char *cfg_prog = "removingcat";
#define CHECK(cond, fmt, ...) \
do { \
errno = 0; \
if (!(cond)) { \
fflush(stdout); \
fprintf(stderr, "%s: %3d: (%s) failed errno=%d (%s) " fmt "\n", cfg_prog, __LINE__, #cond, errno, strerror(errno) __VA_OPT__(,) __VA_ARGS__); \
exit(EXIT_FAILURE); \
} else { \
if (cfg_verbose) { \
fflush(stdout); \
fprintf(stderr, "%s: %3d: (%s) " fmt " OK \n", cfg_prog, __LINE__, #cond __VA_OPT__(,) __VA_ARGS__); \
} \
} \
} while(0)
static long long check_strtoull(const char *nptr) {
errno = 0;
char *endptr;
long long ret = strtoull(nptr, &endptr, 0);
CHECK(errno == 0, "strtoull %s", nptr);
CHECK(!(!ret && nptr == endptr), "strtoull %s", nptr);
CHECK(endptr == nptr + strlen(nptr), "strtoull %s", nptr);
return ret;
}
void usage(char *argv[]) {
printf(
"Usage: %s file...\n"
"\n"
"cat the file while simultenously removing it with fallocate FALLOC_FL_COLLAPSE_RANGE and unlink\n"
"\n"
"Options:\n"
" -S add additional sleep between reads for testing\n"
" -U do not unlink the file\n"
" -b set max buffer size, default: 4M\n"
" -h display this help and exit\n"
" -v be verbose\n"
"\n"
,
cfg_prog
);
}
int main(int argc, char *argv[]) {
cfg_prog = argv[0] ? argv[0] : cfg_prog;
int opt;
while ((opt = getopt(argc, argv, "SUb:hv")) != -1) {
switch (opt) {
case 'S': cfg_sleep = true; break;
case 'U': cfg_unlink = false; break;
case 'b': cfg_maxbuf = check_strtoull(optarg); break;
case 'h': usage(argv); return 0;
case 'v': cfg_verbose = true; break;
default: return 1;
}
}
CHECK(optind < argc, "no file specified");
char *buf = NULL;
for (int i = optind; i < argc; i++) {
int fd;
CHECK((fd = open(argv[i], O_RDWR)) > 0, "open %s", argv[i]);
struct stat st;
CHECK(fstat(fd, &st) == 0, "fstat %s", argv[i]);
const intmax_t blocksize = st.st_blksize;
CHECK(blocksize > 0, "blocksize=%jd", blocksize);
CHECK(blocksize <= cfg_maxbuf, "blocksize=%jd > cfg_maxbuf=%jd", blocksize, cfg_maxbuf);
size_t bufsize = 0;
while (bufsize < cfg_maxbuf) {
CHECK(bufsize + blocksize < SSIZE_MAX, "overflow bufsize=%jd blocksize=%jd", bufsize, blocksize);
CHECK(bufsize + blocksize > bufsize, "overflow bufsize=%jd blocksize=%jd", bufsize, blocksize);
bufsize += blocksize;
}
bufsize -= blocksize;
CHECK(bufsize != 0, "bufsize=%jd", bufsize);
CHECK((buf = realloc(buf, bufsize)) != NULL, "malloc(%jd)", blocksize);
size_t accread = 0;
ssize_t bufread;
while ((bufread = read(fd, buf, bufsize)) > 0) {
CHECK(write(STDOUT_FILENO, buf, bufread) == bufread, "write %jd to stdout", bufread);
accread += bufread;
while (accread >= bufsize) {
if (cfg_sleep) {
usleep(cfg_sleep_ms * 1000);
}
accread -= bufsize;
CHECK(fallocate(fd, FALLOC_FL_COLLAPSE_RANGE, 0, bufsize) == 0, "fallocate FALLOC_FL_COLLAPSE_RANGE on %s", argv[i]);
CHECK(lseek(fd, 0, SEEK_SET) == 0, "lseek %s", argv[i]);
}
}
CHECK(bufread == 0, "read %s", argv[i]);
CHECK(ftruncate(fd, 0) == 0, "ftruncate %s", argv[i]);
if (cfg_unlink) {
CHECK(unlink(argv[i]) == 0, "unlink %s", argv[i]);
}
CHECK(close(fd) == 0, "close %s", argv[i]);
}
free(buf);
}