c++performancerecursionsha256merkle-tree

How to improve the speed of merkle root calculation in C++?


I am trying to optimise the merkle root calculation as much as possible. So far, I implemented it in Python which resulted in this question and the suggestion to rewrite it in C++.

#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <streambuf>
#include <sstream>

#include <openssl/evp.h>
#include <openssl/sha.h>
#include <openssl/crypto.h>



std::vector<unsigned char> double_sha256(std::vector<unsigned char> a, std::vector<unsigned char> b)
{
    unsigned char inp[64];
    int j=0;
    for (int i=0; i<32; i++)
    {
        inp[j] = a[i];
        j++;
    }
    for (int i=0; i<32; i++)
    {
        inp[j] = b[i];
        j++;
    }

    const EVP_MD *md_algo = EVP_sha256();
    unsigned int md_len = EVP_MD_size(md_algo);
    std::vector<unsigned char> out( md_len );
    EVP_Digest(inp, 64, out.data(), &md_len, md_algo, nullptr);
    EVP_Digest(out.data(), md_len, out.data(), &md_len, md_algo, nullptr);
    return out;
}

std::vector<std::vector<unsigned char> > calculate_merkle_root(std::vector<std::vector<unsigned char> > inp_list)
{
   std::vector<std::vector<unsigned char> > out;
   int len = inp_list.size();
   if (len == 1)
   {
        out.push_back(inp_list[0]);
        return out;
   }
   for (int i=0; i<len-1; i+=2)
   {
        out.push_back(
            double_sha256(inp_list[i], inp_list[i+1])
        );
   }
   if (len % 2 == 1)
   {
        out.push_back(
            double_sha256(inp_list[len-1], inp_list[len-1])
        );
   }
   return calculate_merkle_root(out);
}



int main()
{
    std::ifstream infile("txids.txt");

    std::vector<std::vector<unsigned char> > txids;
    std::string line;
    int count = 0;
    while (std::getline(infile, line))
    {
        unsigned char* buf = OPENSSL_hexstr2buf(line.c_str(), nullptr);
        std::vector<unsigned char> buf2;
        for (int i=31; i>=0; i--)
        {
            buf2.push_back(
                buf[i]
            );
        }
        txids.push_back(
            buf2
        );
        count++;
    }
    infile.close();
    std::cout << count << std::endl;

    std::vector<std::vector<unsigned char> > merkle_root_hash;
    for (int k=0; k<1000; k++)
    {
        merkle_root_hash = calculate_merkle_root(txids);
    }
    std::vector<unsigned char> out0 = merkle_root_hash[0];
    std::vector<unsigned char> out;
    for (int i=31; i>=0; i--)
    {
        out.push_back(
            out0[i]
        );
    }

    static const char alpha[] = "0123456789abcdef";
    for (int i=0; i<32; i++)
    {
        unsigned char c = out[i];
        std::cout << alpha[ (c >> 4) & 0xF];
        std::cout << alpha[ c & 0xF];
    }
    std::cout.put('\n');

    return 0;
}

However, the performance is worse compared to the Python implementation (~4s):

$ g++ test.cpp -L/usr/local/opt/openssl/lib -I/usr/local/opt/openssl/include -lcrypto
$ time ./a.out 
1452
289792577c66cd75f5b1f961e50bd8ce6f36adfc4c087dc1584f573df49bd32e

real      0m9.245s
user      0m9.235s
sys       0m0.008s

The complete implementation and the input file are available here: test.cpp and txids.txt.

How can I improve the performance? Are the compiler optimizations enabled by default? Are there faster sha256 libraries than openssl available?


Solution

  • There are plenty of things you can do to optimize the code.

    Here is the list of the important points:

    Here is the resulting code:

    #include <iostream>
    #include <vector>
    #include <string>
    #include <fstream>
    #include <streambuf>
    #include <sstream>
    #include <cstring>
    #include <array>
    #include <algorithm>
    #include <cassert>
    
    #include <openssl/evp.h>
    #include <openssl/sha.h>
    #include <openssl/crypto.h>
    
    using Hash = std::array<unsigned char, 32>;
    
    Hash double_sha256(const Hash& a, const Hash& b)
    {
        assert(a.size() == 32 && b.size() == 32);
    
        unsigned char inp[64];
        std::copy(a.begin(), a.end(), inp);
        std::copy(b.begin(), b.end(), inp+32);
    
        const EVP_MD *md_algo = EVP_sha256();
        assert(EVP_MD_size(md_algo) == 32);
    
        unsigned int md_len = 32;
        Hash out;
        EVP_Digest(inp, 64, out.data(), &md_len, md_algo, nullptr);
        EVP_Digest(out.data(), md_len, out.data(), &md_len, md_algo, nullptr);
        return out;
    }
    
    std::vector<Hash> calculate_merkle_root(const std::vector<Hash>& inp_list)
    {
       std::vector<Hash> out;
       int len = inp_list.size();
       out.reserve(len/2+2);
       if (len == 1)
       {
            out.push_back(inp_list[0]);
            return out;
       }
       for (int i=0; i<len-1; i+=2)
       {
            out.push_back(double_sha256(inp_list[i], inp_list[i+1]));
       }
       if (len % 2 == 1)
       {
            out.push_back(double_sha256(inp_list[len-1], inp_list[len-1]));
       }
       return calculate_merkle_root(out);
    }
    
    int main()
    {
        std::ifstream infile("txids.txt");
    
        std::vector<Hash> txids;
        std::string line;
        while (std::getline(infile, line))
        {
            unsigned char* buf = OPENSSL_hexstr2buf(line.c_str(), nullptr);
            Hash buf2;
            std::copy(buf, buf+32, buf2.begin());
            std::reverse(buf2.begin(), buf2.end());
            txids.push_back(buf2);
            OPENSSL_free(buf);
        }
        infile.close();
        std::cout << txids.size() << std::endl;
    
        std::vector<Hash> merkle_root_hash;
        for (int k=0; k<1000; k++)
        {
            merkle_root_hash = calculate_merkle_root(txids);
        }
        Hash out0 = merkle_root_hash[0];
        Hash out = out0;
        std::reverse(out.begin(), out.end());
    
        static const char alpha[] = "0123456789abcdef";
        for (int i=0; i<32; i++)
        {
            unsigned char c = out[i];
            std::cout << alpha[ (c >> 4) & 0xF];
            std::cout << alpha[ c & 0xF];
        }
        std::cout.put('\n');
    
        return 0;
    }
    

    On my machine, this code is 3 times faster than the initial version and 2 times faster than the Python implementation.

    This implementation spends >98% of its time in EVP_Digest. As a result, if you want a faster code, you could try to find a faster hashing library although OpenSSL should be already pretty fast. The current code already succeed to compute 1.7 millions hashes per second in sequential on a mainstream CPU. This is quite good. Alternatively, you can also parallelize the program using OpenMP (this is roughly 5 times faster on my 6 core machine).