c++linuxmemory-managementboostmemory-pool

Increasing allocation performance for strings


I ported a Java GC test program to C++ (see the code below) as well as Python. The Java and Python performance is much greater than C++ and I was thinking this was due to all the calls to new that have to be done to create the strings each time. I've tried using Boost's fast_pool_allocator but that actually worsened performance from 700ms to 1200ms. Am I using the allocator wrong, or is there something else I should be doing?

EDIT: Compiled with g++ -O3 -march=native --std=c++11 garbage.cpp -lboost_system. g++ is version 4.8.1 One iteration takes in Python is about 300ms and with Java about 50ms. std::allocator gives about 700ms and boost::fast_pool_allocator gives about 1200ms.

#include <string>
#include <vector>
#include <chrono>
#include <list>
#include <iostream>
#include <boost/pool/pool_alloc.hpp>
#include <memory>
//#include <gc/gc_allocator.h>


using namespace std;
#include <sstream>
typedef boost::fast_pool_allocator<char> c_allocator;
//typedef std::allocator<char> c_allocator;
typedef basic_string<char, char_traits<char>, c_allocator> pool_string;
namespace patch {
    template <typename T> pool_string to_string(const T& in) {
        std::basic_stringstream<char, char_traits<char>, c_allocator> stm;
        stm << in;
        return stm.str();
    }
}


#include "mytime.hpp"

class Garbage {
public:
    vector<pool_string> outer;
    vector<pool_string> old;
    const int nThreads = 1;
    //static auto time = chrono::high_resolution_clock();

    void go() {
//        outer.resize(1000000);
        //old.reserve(1000000);
        auto tt = mytime::msecs();
        for (int i = 0; i < 10; ++i) {
            if (i % 100 == 0) {
                cout << "DOING AN OLD" << endl;
                doOld();
                tt = mytime::msecs();
            }

            for (int j = 0; j < 1000000/nThreads; ++j)
                outer.push_back(patch::to_string(j));

            outer.clear();
            auto t = mytime::msecs();
            cout << (t - tt) << endl;
            tt = t;
        }
    }

    void doOld() {
        old.clear();
        for (int i = 0; i < 1000000/nThreads; ++i)
            old.push_back(patch::to_string(i));
    }
};

int main() {
    Garbage().go();
}

Solution

  • The problem is you're using a new string stream each time to convert an integer.

    Fix it:

    namespace patch {
        template <typename T> pool_string to_string(const T& in) {
            return boost::lexical_cast<pool_string>(in);
        }
    }
    

    Now the timings are:

    DOING AN OLD
    0.175462
    0.0670085
    0.0669926
    0.0687969
    0.0692518
    0.0669318
    0.0669196
    0.0669187
    0.0668962
    0.0669185
    
    real    0m0.801s
    user    0m0.784s
    sys 0m0.016s
    

    See it Live On Coliru

    Full code for reference:

    #include <boost/pool/pool_alloc.hpp>
    #include <chrono>
    #include <iostream>
    #include <list>
    #include <memory>
    #include <sstream>
    #include <string>
    #include <vector>
    #include <boost/lexical_cast.hpp>
    //#include <gc/gc_allocator.h>
    
    using string = std::string;
    
    namespace patch {
        template <typename T> string to_string(const T& in) {
            return boost::lexical_cast<string>(in);
        }
    }
    
    class Timer
    {
        typedef std::chrono::high_resolution_clock clock;
        clock::time_point _start;
      public:
        Timer() { reset(); }
        void reset() { _start = now(); }
        double elapsed()
        {
            using namespace std::chrono;
            auto e = now() - _start;
            return duration_cast<nanoseconds>(e).count()*1.0e-9;
        }
        clock::time_point now()
        {
            return clock::now();
        }
    };
    
    
    class Garbage {
        public:
            std::vector<string> outer;
            std::vector<string> old;
            const int nThreads = 1;
    
            void go() {
                outer.resize(1000000);
                //old.reserve(1000000);
                Timer timer;
    
                for (int i = 0; i < 10; ++i) {
                    if (i % 100 == 0) {
                        std::cout << "DOING AN OLD" << std::endl;
                        doOld();
                    }
    
                    for (int j = 0; j < 1000000/nThreads; ++j)
                        outer.push_back(patch::to_string(j));
    
                    outer.clear();
                    std::cout << timer.elapsed() << std::endl;
                    timer.reset();
                }
            }
    
            void doOld() {
                old.clear();
                for (int i = 0; i < 1000000/nThreads; ++i)
                    old.push_back(patch::to_string(i));
            }
    };
    
    int main() {
        Garbage().go();
    }