I'm looking for comprehend explanation or implementation of PyTorch.Conv1d layer in C++.
Let's define conv1d layer as:
BATCH_SIZE = 20
INPUT_SIZE = 160
OUTPUT_SIZE = 64
KERNEL_SIZE = int(self.INPUT_SIZE / self.BATCH_SIZE) # 8
l = torch.nn.Conv1d(BATCH_SIZE, OUTPUT_SIZE,
kernel_size=KERNEL_SIZE,
bias=False, dtype=torch.float64))
# and the input
inp = some_lin_x_160.view(BATCH_SIZE, -1)
l(inp)
Although all sources I found specify the output size like: (inp_channels - kernel_size + 1)/stride
I'm not entirely clear how pytorch handles arbitrary(defined by client code) out_channels.
This is the very naive implementation of the layer based on Eigen library, and I'm pretty sure it's wrong.
typedef Eigen::Matrix<double, 1, 160> Input_t;
typedef Eigen::TensorFixedSize<double, Eigen::Sizes<64, 20, 8>> Conv1d_t;
Eigen::Matrix<double, 1, 64> conv1d(
const Input_t& raw_input,
const Conv1d_t& weights) noexcept {
auto input = Eigen::Matrix<T, 20, 8>(raw_input.data());
auto output = Eigen::Matrix<double, 64, 1>();
output.setZero();
for (int out_ch = 0; out_ch < 64; out_ch++) {
for (int bs = 0; bs < 20; bs++)
for (int k=0; k<8; k++)
output(out_ch, 0) += input(bs, k) * weights(out_ch, bs, k);
}
return output.transpose();
}
I'm really appreciate for the any help with the question.
Thanks to @Kari for clarifying in the comments. So in this particular case there was a problem with the default StorageOptions in Eigen, which is column major by default. This is fixed version of implementation sketch:
constexpr static int input_channels = 160;
constexpr static int output_channels = 64;
constexpr static int batch_number = 20;
constexpr static int kernel_size = ...;
typedef Eigen::Matrix<double, 1, input_channels> Input_t;
typedef Eigen::TensorFixedSize<double, Eigen::Sizes<output_channels, batch_number, kernel_size>> Conv1d_t;
auto conv1d(
const Input_t& raw_input,
const Conv1d_t& weights) noexcept {
constexpr auto seq = input_channels/batch_number - kernel_size + 1;
constexpr auto batch_size = input_channels / batch_number;
auto input = mapInto<batch_number, batch_size, Eigen::RowMajor>(raw_input);
auto output = Eigen::Matrix<double, output_channels, 1>();
output.setZero();
for (int out_ch = 0; out_ch < output_channels; out_ch++)
for (int s = 0; s<seq; s++)
for (int bs = 0; bs < batch_number; bs++)
for (int k=0; k<kernel_size; k++)
output(out_ch, s) += input(bs, k + s) * weights(out_ch, bs, k);
return output.transpose();
}