unix

With linux/bash, is there a command that converts a file into a stream, deleting the file on the fly?


Think of a very large file, too large for the box to hold the file as well as a copy.

I would like a command that pipes the file into a stream, just like cat does, but freeing all blocks that have been piped, so the file is gone not only afterwards, but incrementally while the piping is done.


Solution

  • I have written the following C program that uses fallocate( FALLOC_FL_COLLAPSE_RANGE to remove the bytes in block size from the front of the file. Works ok.

    #define _GNU_SOURCE
    #include <stdio.h>
    #include <fcntl.h>
    #include <unistd.h>
    #include <getopt.h>
    #include <sys/ioctl.h>
    #include <sys/statfs.h>
    #include <sys/stat.h>
    #include <linux/fs.h>
    #include <errno.h>
    #include <string.h>
    #include <stdlib.h>
    #include <limits.h>
    #include <inttypes.h>
    #include <stdbool.h>
    
    static bool cfg_verbose = false;
    static bool cfg_unlink = true;
    static bool cfg_sleep = false;
    static size_t cfg_sleep_ms = 1000;
    static size_t cfg_maxbuf = 4 * 1024 * 1024;
    static const char *cfg_prog = "removingcat";
    
    #define CHECK(cond, fmt, ...) \
      do { \
        errno = 0; \
        if (!(cond)) { \
          fflush(stdout); \
          fprintf(stderr, "%s: %3d: (%s) failed errno=%d (%s) " fmt "\n", cfg_prog, __LINE__, #cond, errno, strerror(errno) __VA_OPT__(,) __VA_ARGS__); \
          exit(EXIT_FAILURE); \
        } else { \
          if (cfg_verbose) { \
            fflush(stdout); \
            fprintf(stderr, "%s: %3d: (%s) " fmt " OK \n", cfg_prog, __LINE__, #cond __VA_OPT__(,) __VA_ARGS__); \
          } \
        } \
      } while(0)
    
    static long long check_strtoull(const char *nptr) {
      errno = 0;
      char *endptr;
      long long ret = strtoull(nptr, &endptr, 0);
      CHECK(errno == 0, "strtoull %s", nptr);
      CHECK(!(!ret && nptr == endptr), "strtoull %s", nptr);
      CHECK(endptr == nptr + strlen(nptr), "strtoull %s", nptr);
      return ret;
    }
    
    void usage(char *argv[]) {
      printf(
        "Usage: %s file...\n"
        "\n"
        "cat the file while simultenously removing it with fallocate FALLOC_FL_COLLAPSE_RANGE and unlink\n"
        "\n"
        "Options:\n"
        "  -S  add additional sleep between reads for testing\n"
        "  -U  do not unlink the file\n"
        "  -b  set max buffer size, default: 4M\n"
        "  -h  display this help and exit\n"
        "  -v  be verbose\n"
        "\n"
        ,
        cfg_prog
      );
    }
    
    int main(int argc, char *argv[]) {
      cfg_prog = argv[0] ? argv[0] : cfg_prog;
      int opt;
      while ((opt = getopt(argc, argv, "SUb:hv")) != -1) {
        switch (opt) {
          case 'S': cfg_sleep = true; break;
          case 'U': cfg_unlink = false; break;
          case 'b': cfg_maxbuf = check_strtoull(optarg); break;
          case 'h': usage(argv); return 0;
          case 'v': cfg_verbose = true; break;
          default: return 1;
        }
      }
      CHECK(optind < argc, "no file specified");
      char *buf = NULL;
      for (int i = optind; i < argc; i++) {
        int fd;
        CHECK((fd = open(argv[i], O_RDWR)) > 0, "open %s", argv[i]);
        struct stat st;
        CHECK(fstat(fd, &st) == 0, "fstat %s", argv[i]);
        const intmax_t blocksize = st.st_blksize;
        CHECK(blocksize > 0, "blocksize=%jd", blocksize);
        CHECK(blocksize <= cfg_maxbuf, "blocksize=%jd > cfg_maxbuf=%jd", blocksize, cfg_maxbuf);
        size_t bufsize = 0;
        while (bufsize < cfg_maxbuf) {
          CHECK(bufsize + blocksize < SSIZE_MAX, "overflow bufsize=%jd blocksize=%jd", bufsize, blocksize);
          CHECK(bufsize + blocksize > bufsize, "overflow bufsize=%jd blocksize=%jd", bufsize, blocksize);
          bufsize += blocksize;
        }
        bufsize -= blocksize;
        CHECK(bufsize != 0, "bufsize=%jd", bufsize);
        CHECK((buf = realloc(buf, bufsize)) != NULL, "malloc(%jd)", blocksize);
        size_t accread = 0;
        ssize_t bufread;
        while ((bufread = read(fd, buf, bufsize)) > 0) {
          CHECK(write(STDOUT_FILENO, buf, bufread) == bufread, "write %jd to stdout", bufread);
          accread += bufread;
          while (accread >= bufsize) {
            if (cfg_sleep) {
              usleep(cfg_sleep_ms * 1000);
            }
            accread -= bufsize;
            CHECK(fallocate(fd, FALLOC_FL_COLLAPSE_RANGE, 0, bufsize) == 0, "fallocate FALLOC_FL_COLLAPSE_RANGE on %s", argv[i]);
            CHECK(lseek(fd, 0, SEEK_SET) == 0, "lseek %s", argv[i]);
          }
        }
        CHECK(bufread == 0, "read %s", argv[i]);
        CHECK(ftruncate(fd, 0) == 0, "ftruncate %s", argv[i]);
        if (cfg_unlink) {
          CHECK(unlink(argv[i]) == 0, "unlink %s", argv[i]);
        }
        CHECK(close(fd) == 0, "close %s", argv[i]);
      }
      free(buf);
    }