c++eigenmini-batch

The optimal way to split Eigen MatrixXd into fixed-size batches with randomly shuffled rows


I have input and target data represented as MatrixXd (N x M) and VectorXd (N). The goal is to create mini-batches of size K consisting of a subset of input and target data shuffled in the same way. Then, the ML model will process these mini-batches in a loop. Could you recommend how to achieve this with as less as possible copying (maybe, with a code example)?

My attempt to implement such kind of batching

#include <algorithm>
#include <numeric>
#include <random>

#include <Eigen/Dense>

using Eigen::MatrixXd;
using Eigen::Ref;
using Eigen::VectorXd;

struct Batch {
    const Ref<const MatrixXd> input;
    const Ref<const VectorXd> target;
};

std::vector<Batch> generate_batches(const Ref<const MatrixXd> input, const Ref<const VectorXd> target, unsigned batch_size)
{
    unsigned num_samples = input.rows();
    unsigned num_batches = ceil(num_samples / (float)batch_size);

    static std::default_random_engine engine;
    std::vector<unsigned> idxs(num_samples);
    std::iota(idxs.begin(), idxs.end(), 0);
    std::shuffle(idxs.begin(), idxs.end(), engine);

    std::vector<Batch> batches;
    batches.reserve(num_batches);

    auto idxs_begin = std::make_move_iterator(idxs.begin());
    for (unsigned idx = 0; idx < num_batches; ++idx) {
        int start = idx * batch_size;
        int end = std::min(start + batch_size, num_samples);

        std::vector<unsigned> batch_idxs(std::next(idxs_begin, start), std::next(idxs_begin, end));
        batches.push_back({ input(batch_idxs, Eigen::all), target(batch_idxs) });
    }
    return batches;
}

Solution

  • Eigen comes with a Transpositions type that does just that. It works in-place by swapping rows or columns. So you can just keep shuffling the same matrix over and over again.

    #include <Eigen/Dense>
    
    #include <algorithm>
    // using std::min
    #include <cassert>
    #include <random>
    // using std::default_random_engine, std::uniform_int_distribution
    
    
    void shuffle_apply(Eigen::Ref<Eigen::MatrixXd> mat,
                       Eigen::Ref<Eigen::VectorXd> vec,
                       int generations, int batchsize)
    {
      // colwise is faster than rowwise
      const Eigen::Index size = mat.cols();
      assert(vec.size() == size);
      using Transpositions = Eigen::Transpositions<
        Eigen::Dynamic, Eigen::Dynamic, Eigen::Index>;
      Transpositions transp(size);
      Eigen::Index* transp_indices = transp.indices().data();
      std::default_random_engine rng; // seed appropriately!
      for(int gen = 0; gen < generations; ++gen) {
        for(Eigen::Index i = 0; i < size; ++i) {
          std::uniform_int_distribution<Eigen::Index> distr(i, size - 1);
          transp_indices[i] = distr(rng);
        }
        mat = mat * transp; // operates in-place
        vec = transp * vec; // transp on left side to shuffle rows, not cols
        for(Eigen::Index start = 0; start < size; start += batchsize) {
          const Eigen::Index curbatch = std::min<Eigen::Index>(
                batchsize, size - start);
          const auto mat_batch = mat.middleCols(start, curbatch);
          const auto vec_batch = vec.segment(start, curbatch);
        }
      }
    }
    

    See also Permute Columns of Matrix in Eigen and similar questions.

    EDIT: An older version of this initialized the indices via std::shuffle which I think is wrong

    Here is a second version that may offer a more palatable interface. In particular, the original matrix and vector can be restored without taking a copy.

    class BatchShuffle
    {
      using Transpositions = Eigen::Transpositions<
        Eigen::Dynamic, Eigen::Dynamic, Eigen::Index>;
      using Permutations = Eigen::PermutationMatrix<
        Eigen::Dynamic, Eigen::Dynamic, Eigen::Index>;
    
      Eigen::MatrixXd mat_;
      Eigen::VectorXd vec_;
      Transpositions cur_transp;
      Permutations aggregated_permut;
    public:
      BatchShuffle(Eigen::MatrixXd mat, Eigen::VectorXd vec)
        : mat_(std::move(mat)),
          vec_(std::move(vec)),
          cur_transp(this->mat_.cols()),
          aggregated_permut(this->mat_.cols())
      {
        assert(this->vec_.size() == this->mat_.cols());
        aggregated_permut.setIdentity();
      }
      Eigen::Index totalsize() const noexcept
      { return mat_.cols(); }
    
      const Eigen::MatrixXd& mat() const noexcept
      { return mat_; }
    
      const Eigen::VectorXd& vec() const noexcept
      { return vec_; }
      
      template<class RandomNumberEngine>
      void shuffle(RandomNumberEngine& rng)
      {
        Eigen::Index* indices = cur_transp.indices().data();
        for(Eigen::Index i = 0, n = totalsize(); i < n; ++i) {
          std::uniform_int_distribution<Eigen::Index> distr(i, n - 1);
          indices[i] = distr(rng);
        }
        Permutations::IndicesType& aggregated = aggregated_permut.indices();
        aggregated = cur_transp * aggregated;
        mat_ = mat_ * cur_transp;
        vec_ = cur_transp * vec_;
      }
      void BatchShuffle::restore_original()
      {
        const auto& inverse = aggregated_permut.inverse().eval();
        mat_ = mat_ * inverse;
        vec_ = inverse * vec_;
        aggregated_permut.setIdentity();
      }
    };
    
    void apply(const Eigen::Ref<const Eigen::MatrixXd>& mat,
               const Eigen::Ref<const Eigen::VectorXd>& vec);
    
    int main()
    {
      int rows = 1000, cols = 10000, batchsize = 100;
      BatchShuffle batch(Eigen::MatrixXd::Random(rows, cols),
                         Eigen::VectorXd::Random(cols));
      std::default_random_engine rng;
      for(int i = 0; i < 100; ++i) {
        batch.shuffle(rng);
        for(Eigen::Index j = 0; j < batch.totalsize(); j += batchsize) {
          Eigen::Index cursize =
            std::min<Eigen::Index>(batchsize, batch.totalsize() - j);
          apply(batch.mat().middleCols(j, cursize),
                batch.vec().segment(j, cursize));
        }
      }
      batch.restore_original();
    }
    

    Again, I chose to use the matrix column-wise unlike in your code attempt where you take rows. Eigen stores its matrices in column-major order (a.k.a. Fortran order). Taking row slices rather than column slices will significantly slow down pretty much everything you do with the data. So I really urge you to transpose your input generation and matrix use accordingly, if at all possible.