c++serializationboostboost-dynamic-bitsetboost-bimap

Serialize boost::bimap with boost::dynamic_bitset as key value pair


I am interested to serialize a boost::bimap containing boost::dynamic_bitset so that I can save that and load back when needed. I have made an attempt to do that but I get many errors. The code that I have with me is as below.

// Example program
#include <iostream>
#include <string>
#include <fstream>
#include <boost/bimap.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/serialization/vector.hpp>


namespace std {
    template <typename Block, typename Alloc>
    struct hash<boost::dynamic_bitset<Block, Alloc> > {

        using bitset_type = boost::dynamic_bitset<Block, Alloc>;
        using block_type = typename bitset_type::block_type ;

        size_t operator()(boost::dynamic_bitset<Block, Alloc> const& bs) const
        {
            thread_local static std::vector<block_type> block_data;
            auto blocks = bs.num_blocks();
            block_data.assign(blocks, 0);
            to_block_range(bs, block_data.begin());
            return boost::hash<std::vector<block_type>>()(block_data);
        }
    };
}


    namespace {
        template <typename Block>
        struct SerializableType {
            boost::dynamic_bitset<Block> x;
            private:
            friend class boost::serialization::access;
            template <class Archive> void serialize(Archive &ar, const unsigned int) {
                ar & BOOST_SERIALIZATION_NVP(x);
                }
                };
}

namespace bimaps = boost::bimaps;
    typedef boost::dynamic_bitset<> Bitset;
    typedef boost::bimap<
    bimaps::unordered_set_of<Bitset, std::hash<Bitset>>,
    bimaps::unordered_multiset_of<Bitset, std::hash<Bitset> > > bimap_reference;
    typedef bimap_reference::value_type position;
    bimap_reference index;

int main() {
    std::string val = "1010110110101010101";
    std::string key = "10010";

    boost::dynamic_bitset<> bits = boost::dynamic_bitset<> (val);
    boost::dynamic_bitset<> pos_bits = boost::dynamic_bitset<> (key);
    index.insert(position(pos_bits, bits));

    std::ofstream ofs("binaryfile");
    boost::archive::binary_oarchive oa(ofs);
    oa << BOOST_SERIALIZATION_NVP(index);    
    index::const_iterator iter = index.begin();    


    // first left elelemt of bimap
    boost::dynamic_bitset<> first_left = iter->left;
    const bimap_reference::left_iterator left_iter = index.left.find(first_left);
    oa << left_iter;

//  first right element of bimap
    auto pos = index.left.find(first_left);
    const bimap_reference::right_iterator right_iter = index.right.find(pos->second);
    oa << right_iter;
    std::cout <<"# done" << std::endl;
    ofs.close();


    std::cout <<"# Loading binary file ... " << std::endl;
    std::ifstream ifs("binaryfile"); // name of loading file
    boost::archive::binary_iarchive ia(ifs);
    ia >> index;
    bimap_reference::left_iterator left_iter;
    ia >> left_iter;
    bimap_reference::right_iterator right_iter;
    ia >> right_iter;
    std::cout<<"# done" << std::endl;
    ifs.close();
    return 0;
}

How can I do it?.

EDIT_1:

Showing the comiler and linker setting in the Eclispse(because I get some errors with the answer code provided @sehe).

enter image description here

enter image description here

enter image description here

EDIT_2

Command used on linux terminal g++ -std=c++14 -Os -Wall -pedantic -pthread main.cpp -lboost_serialization && ./a.out

Errors I get with the first answer code when tried in linux terminal and eclipse IDE.
/tmp/cc71hTC2.o: In function `boost::archive::detail::common_iarchive<boost::archive::binary_iarchive>::vload(boost::archive::class_name_type&)':
main.cpp:(.text._ZN5boost7archive6detail15common_iarchiveINS0_15binary_iarchiveEE5vloadERNS0_15class_name_typeE[_ZN5boost7archive6detail15common_iarchiveINS0_15binary_iarchiveEE5vloadERNS0_15class_name_typeE]+0x1): undefined reference to `boost::archive::basic_binary_iarchive<boost::archive::binary_iarchive>::load_override(boost::archive::class_name_type&)'
/tmp/cc71hTC2.o: In function `void boost::serialization::throw_exception<boost::archive::archive_exception>(boost::archive::archive_exception const&)':
main.cpp:(.text._ZN5boost13serialization15throw_exceptionINS_7archive17archive_exceptionEEEvRKT_[_ZN5boost13serialization15throw_exceptionINS_7archive17archive_exceptionEEEvRKT_]+0x1a): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/cc71hTC2.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::exception_detail::error_info_injector<boost::archive::archive_exception> const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC2ERKS4_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS4_]+0x18): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/cc71hTC2.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::exception_detail::error_info_injector<boost::archive::archive_exception> const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC1ERKS4_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS4_]+0x19): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/cc71hTC2.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::archive::archive_exception const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC2ERKS3_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS3_]+0xe): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/cc71hTC2.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::archive::archive_exception const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC1ERKS3_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS3_]+0x14): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
collect2: error: ld returned 1 exit status

EDIT 3 Using g++ -std=c++14 -Os -Wall -pedantic -pthread main.cpp -lboost_serialization && ldd a.out I get the following info

/tmp/ccBH8KfT.o: In function `boost::archive::detail::common_iarchive<boost::archive::binary_iarchive>::vload(boost::archive::class_name_type&)':
main.cpp:(.text._ZN5boost7archive6detail15common_iarchiveINS0_15binary_iarchiveEE5vloadERNS0_15class_name_typeE[_ZN5boost7archive6detail15common_iarchiveINS0_15binary_iarchiveEE5vloadERNS0_15class_name_typeE]+0x1): undefined reference to `boost::archive::basic_binary_iarchive<boost::archive::binary_iarchive>::load_override(boost::archive::class_name_type&)'
/tmp/ccBH8KfT.o: In function `void boost::serialization::throw_exception<boost::archive::archive_exception>(boost::archive::archive_exception const&)':
main.cpp:(.text._ZN5boost13serialization15throw_exceptionINS_7archive17archive_exceptionEEEvRKT_[_ZN5boost13serialization15throw_exceptionINS_7archive17archive_exceptionEEEvRKT_]+0x1a): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/ccBH8KfT.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::exception_detail::error_info_injector<boost::archive::archive_exception> const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC2ERKS4_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS4_]+0x18): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/ccBH8KfT.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::exception_detail::error_info_injector<boost::archive::archive_exception> const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC1ERKS4_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS4_]+0x19): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/ccBH8KfT.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::archive::archive_exception const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC2ERKS3_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS3_]+0xe): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'
/tmp/ccBH8KfT.o: In function `boost::exception_detail::error_info_injector<boost::archive::archive_exception>::error_info_injector(boost::archive::archive_exception const&)':
main.cpp:(.text._ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC1ERKS3_[_ZN5boost16exception_detail19error_info_injectorINS_7archive17archive_exceptionEEC5ERKS3_]+0x14): undefined reference to `boost::archive::archive_exception::archive_exception(boost::archive::archive_exception const&)'

Solution

    1. Wow. You're not aiming for performance with that hash function.

      • you're copying all the blocks on every key/value hash (e.g. on lookup, on insert)
      • you better never wish to use co-routines because that thread-local static will make your life miserable

      See my BONUS section below

    2. Why this awkward dance (simplified code):

      auto iter = index.begin();
      
      // first left element of bimap
      BS first_left = iter->left;
      Index::left_iterator left_iter = index.left.find(first_left);
      

      What is wrong with

      auto left_iter = index.left.begin();
      
    3. What do you think is the validity of an iterator when serialized? (See Iterator invalidation rules)

      oa << left_iter;
      

      I think loading a new datastructure from storage counts as "reallocation". Iterators or references to another datastructure are obviously meaningless here.

    4. Erm. Now it's really getting confusing.

      //  first right element of bimap
      auto pos = index.left.find(first_left);
      Index::right_iterator right_iter = index.right.find(pos->second);
      

      You call it the "first right element", but you do something ELSE: you find the iterator corresponding to the first_left key (which may well be the last element on the right. Also note that since the right hand side of the bimap is multiset_of, there might be multiple matches and you random use the first.

      (Side note: pos is a useless duplication of left_iter's value)

    5. See 3.

      oa << right_iter;
      
    6. Varia:

      • make sure you open the files as binary

        std::ofstream ofs("binaryfile", std::ios::binary);
        std::ifstream ifs("binaryfile", std::ios::binary);
        
      • why do you name a container with value-semantics index_reference? That's just unnecessarily confusing

      • SerializableType is unused
      • BOOST_SERIALIZATION_NVP is meaningless for binary archives (nodes have no names in those)

    The Real Question

    I suppose, the real question might have been "how do I serialize the Bitsets?". I'm happy to inform you I wrote the required bits in 2015: How to serialize boost::dynamic_bitset? and the pull request has been accepted into Boost starting with version 1.64.

    So, you can sit back, sip your tea and include:

    #include <boost/dynamic_bitset/serialization.hpp>
    

    All done.

    The BONUS Section

    Since that serialization achieves a minimal-copy serialization, why not use it to power the hash function? The serialization mechanism will provide you the required private access.

    I've abused serialization plumbing for hash<> specializations before: Hash an arbitrary precision value (boost::multiprecision::cpp_int)

    Putting It All Together

    Live On Coliru

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/bimap.hpp>
    #include <boost/bimap/unordered_multiset_of.hpp>
    #include <boost/bimap/unordered_set_of.hpp>
    #include <boost/dynamic_bitset/serialization.hpp>
    #include <fstream>
    #include <iostream>
    #include <string>
    
    #include <boost/iostreams/device/back_inserter.hpp>
    #include <boost/iostreams/stream_buffer.hpp>
    #include <boost/iostreams/stream.hpp>
    
    #include <boost/functional/hash.hpp>
    
    namespace serial_hashing { // see https://stackoverflow.com/questions/30097385/hash-an-arbitrary-precision-value-boostmultiprecisioncpp-int
        namespace io = boost::iostreams;
    
        struct hash_sink {
            hash_sink(size_t& seed_ref) : _ptr(&seed_ref) {}
    
            typedef char         char_type;
            typedef io::sink_tag category;
    
            std::streamsize write(const char* s, std::streamsize n) {
                boost::hash_combine(*_ptr, boost::hash_range(s, s+n));
                return n;
            }
          private:
            size_t* _ptr;
        };
    
        template <typename T> struct hash_impl {
            size_t operator()(T const& v) const {
                using namespace boost;
                size_t seed = 0;
                {
                    iostreams::stream<hash_sink> os(seed);
                    archive::binary_oarchive oa(os, archive::no_header | archive::no_codecvt);
                    oa << v;
                }
                return seed;
            }
        };
    }
    
    namespace std {
        template <typename Block, typename Alloc> struct hash<boost::dynamic_bitset<Block, Alloc> >
            : serial_hashing::hash_impl<boost::dynamic_bitset<Block, Alloc> > 
        {};
    } // namespace std
    
    namespace bimaps = boost::bimaps;
    using Bitset = boost::dynamic_bitset<>;
    
    typedef boost::bimap<
        bimaps::unordered_set_of<Bitset, std::hash<Bitset> >,
         bimaps::unordered_multiset_of<Bitset, std::hash<Bitset> > > Index;
    
    int main() {
        using namespace std::string_literals;
    
        {
            std::cout << "# Writing binary file ... " << std::endl;
            Index index;
            index.insert({Bitset("10010"s), Bitset("1010110110101010101"s)});
    
            std::ofstream ofs("binaryfile", std::ios::binary);
            boost::archive::binary_oarchive oa(ofs);
            oa << index;
        }
    
        {
            std::cout << "# Loading binary file ... " << std::endl;
            std::ifstream ifs("binaryfile", std::ios::binary); // name of loading file
    
            boost::archive::binary_iarchive ia(ifs);
    
            Index index;
            ia >> index;
        }
    }
    

    Prints

    # Writing binary file ... 
    # Loading binary file ... 
    

    No problem.

    POST SCRIPTUM

    Really, save yourself trouble. Since your usage clearly indicates you do not want unordered semantics, just make it ordered:

    Live On Coliru

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/bimap.hpp>
    #include <boost/bimap/multiset_of.hpp>
    #include <boost/dynamic_bitset/serialization.hpp>
    #include <fstream>
    #include <iostream>
    
    namespace bimaps = boost::bimaps;
    using Bitset = boost::dynamic_bitset<>;
    
    typedef boost::bimap<bimaps::set_of<Bitset>, bimaps::multiset_of<Bitset>> Index;
    
    int main() {
        using namespace std::string_literals;
    
        {
            std::cout << "# Writing binary file ... " << std::endl;
            Index index;
            index.insert({Bitset("10010"s), Bitset("1010110110101010101"s)});
    
            std::ofstream ofs("binaryfile", std::ios::binary);
            boost::archive::binary_oarchive oa(ofs);
            oa << index;
        }
    
        {
            std::cout << "# Loading binary file ... " << std::endl;
            std::ifstream ifs("binaryfile", std::ios::binary); // name of loading file
    
            boost::archive::binary_iarchive ia(ifs);
    
            Index index;
            ia >> index;
        }
    }
    

    Down to 36 lines, less than half the code left.