c++serializationboostversioningboost-serialization

How to allow to "forget" objets types in boost::serialize


I use Boost to serialize classes I register with register_type, as described here.

Later, if I decide a specific class is no longer useful, and I want to open old file and discard the forgotten class instances, I don't have a way to do it.

How can I make this work?

Here is an example:

#include <iostream>
#include <sstream>
#include <boost/serialization/serialization.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp>

struct Base
{
    virtual ~Base() = default;

    template <class Archive>
    void serialize(Archive &ar, long int version)
    {}

    virtual void display(std::ostream &os) const = 0;
};

struct MyType1 : public Base
{
    int i, j;

    MyType1(): MyType1(0, 0) {}
    MyType1(int i_, int j_): i {i_}, j {j_} {}
    ~MyType1() override = default;

    template <class Archive>
    void serialize(Archive &ar, long int version)
    {
        ar & boost::serialization::base_object<Base>(*this);
        ar & i;
        ar & j;
    }

    void display(std::ostream &os) const override
    {
        os << "MyType1{" << i << ", " << j << "}";
    }
};

struct MyType2 : public Base
{
    float a;

    MyType2(): MyType2(0.f) {}
    MyType2(float a_): a {a_} {}
    ~MyType2() override = default;

    template <class Archive>
    void serialize(Archive &ar, long int version)
    {
        ar & boost::serialization::base_object<Base>(*this);
        ar & a;
    }

    void display(std::ostream &os) const override
    {
        os << "MyType2{" << a << '}';
    }
};

std::ostream &operator<<(std::ostream &os, Base const &b)
{
    b.display(os);
    return os;
}

int main()
{
    std::stringstream stream;

    {
        boost::archive::binary_oarchive oar {stream};
        oar.register_type<MyType1>();
        oar.register_type<MyType2>();
        Base *foo1 = new MyType1 {42, 12},
                *foo2 = new MyType2 {32.f};

        oar << foo1 << foo2;
        delete foo1;
        delete foo2;
    }

    boost::archive::binary_iarchive iar {stream};
    
    // Remove a type
    //iar.register_type<MyType1>();

    iar.register_type<MyType2>();
    Base *obj = nullptr;
    iar >> obj;
    // Outputs MyType2{5.88545e-44}
    std::cout << *obj << '\n';

    return 0;
}

Solution

  • You can forget the type, but then you can obviously no longer read any archives containing the old type. Since that's exactly what you're trying to, it breaks.

    Simplified example Live On Coliru

    If your archive didn't contain the type, you would still get an exception, because of the way you are registering the types: Live On Coliru throwing an archive exception "unregistered class".

    The way you register types means you have to match the order and number of registrations always.

    Exporting Classes

    Instead, consider using the export mechanism: https://www.boost.org/doc/libs/1_80_0/libs/serialization/doc/special.html#export

    Here's the example adapted: Live On Coliru

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/serialization/export.hpp>
    #include <boost/serialization/serialization.hpp>
    #include <fstream>
    #include <iostream>
    #include <sstream>
    
    struct Base {
        virtual ~Base() = default;
        virtual void display(std::ostream& os) const = 0;
        void serialize(auto&, unsigned) {}
    };
    
    struct MyType1 : public Base {
        int i, j;
        MyType1(int i_ = 0, int j_ = 0) : i{i_}, j{j_} {}
        void serialize(auto& ar, unsigned) { ar& boost::serialization::base_object<Base>(*this) & i& j; }
        void display(std::ostream& os) const override { os << "MyType1{" << i << ", " << j << "}"; }
    };
    
    struct MyType2 : public Base {
        float a;
        MyType2(float a_ = 0.f): a {a_} {}
        void serialize(auto& ar, unsigned) { ar & boost::serialization::base_object<Base>(*this) & a; }
        void display(std::ostream& os) const override { os << "MyType2{" << a << "}"; }
    };
    
    BOOST_CLASS_EXPORT(MyType1)
    BOOST_CLASS_EXPORT(MyType2)
    
    static inline std::ostream& operator<<(std::ostream& os, Base const& b) { return b.display(os), os; }
    
    int main() {
        std::stringstream ss;
    
        {
            boost::archive::binary_oarchive oa{ss};
            Base* foo1 = new MyType1{42, 12};
            Base* foo2 = new MyType2{32.f};
    
            oa << foo1 << foo2;
            delete foo1;
            delete foo2;
        }
    
        std::ofstream("output.bin", std::ios::binary) << ss.str();
    
        {
            boost::archive::binary_iarchive ia{ss};
    
            Base* obj1 = nullptr;
            Base* obj2 = nullptr;
            ia >> obj1 >> obj2;
    
            std::cout << *obj1 << "\n";
            std::cout << *obj2 << "\n";
    
            delete obj2;
            delete obj1;
        }
    }
    

    This takes care of unique identification (using the qualified type name). Therefore, when you are ready to drop support for old archives with the old class, you can just omit it, and be happy: Live On Coliru

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/serialization/export.hpp>
    #include <boost/serialization/serialization.hpp>
    #include <fstream>
    #include <iostream>
    #include <sstream>
    
    struct Base {
        virtual ~Base() = default;
        virtual void display(std::ostream& os) const = 0;
        void serialize(auto&, unsigned) {}
    };
    
    struct MyType1 : public Base {
        int i, j;
        MyType1(int i_ = 0, int j_ = 0) : i{i_}, j{j_} {}
        void serialize(auto& ar, unsigned) { ar& boost::serialization::base_object<Base>(*this) & i& j; }
        void display(std::ostream& os) const override { os << "MyType1{" << i << ", " << j << "}"; }
    };
    
    struct MyType2 : public Base {
        float a;
        MyType2(float a_ = 0.f): a {a_} {}
        void serialize(auto& ar, unsigned) { ar & boost::serialization::base_object<Base>(*this) & a; }
        void display(std::ostream& os) const override { os << "MyType2{" << a << "}"; }
    };
    
    static inline std::ostream& operator<<(std::ostream& os, Base const& b) { return b.display(os), os; }
    
    #ifdef OLD_WRITER
        BOOST_CLASS_EXPORT(MyType1)
        BOOST_CLASS_EXPORT(MyType2)
    
        int main() {
            std::ofstream ofs("output.bin", std::ios::binary);
            boost::archive::binary_oarchive oa{ofs};
    
            Base* foo2 = new MyType2{42.f};
            oa << foo2;
            delete foo2;
        }
    #else
        // forgotten: MyType1
        BOOST_CLASS_EXPORT(MyType2)
        int main() {
            std::ifstream ifs("output.bin", std::ios::binary);
            boost::archive::binary_iarchive ia{ifs};
    
            Base* obj2 = nullptr;
            ia >> obj2;
    
            std::cout << *obj2 << "\n";
            delete obj2;
        }
    #endif
    

    With

    g++ -std=c++20 -O2 -Wall -pedantic -pthread main.cpp -lboost_serialization -DOLD_WRITER -o old
    g++ -std=c++20 -O2 -Wall -pedantic -pthread main.cpp -lboost_serialization -DNEW_READER -o new
    ./old; xxd output.bin; ./new
    

    Prints

    00000000: 1600 0000 0000 0000 7365 7269 616c 697a  ........serializ
    00000010: 6174 696f 6e3a 3a61 7263 6869 7665 1300  ation::archive..
    00000020: 0408 0408 0100 0000 0000 0700 0000 0000  ................
    00000030: 0000 4d79 5479 7065 3201 0000 0000 0000  ..MyType2.......
    00000040: 0000 0000 0000 0000 0028 42              .........(B
    MyType2{42}
    

    Advanced: Versioning

    You can also use the explicit register-type, but you'd have to do versioning to get some kind of compatibility instead of UB.

    You could wrap your archived data in a class that does the registration and is also versioned:

    Live On Coliru: V0

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/serialization/serialization.hpp>
    #include <boost/serialization/unique_ptr.hpp>
    #include <boost/serialization/vector.hpp>
    #include <fstream>
    #include <iostream>
    #include <sstream>
    
    struct Base {
        virtual ~Base() = default;
        virtual void display(std::ostream& os) const = 0;
        void serialize(auto&, unsigned) {}
    };
    
    struct MyType1 : public Base {
        int i, j;
        MyType1(int i_ = 0, int j_ = 0) : i{i_}, j{j_} {}
        void serialize(auto& ar, unsigned) { ar& boost::serialization::base_object<Base>(*this) & i& j; }
        void display(std::ostream& os) const override { os << "MyType1{" << i << ", " << j << "}"; }
    };
    
    struct MyType2 : public Base {
        float a;
        MyType2(float a_ = 0.f): a {a_} {}
        void serialize(auto& ar, unsigned) { ar & boost::serialization::base_object<Base>(*this) & a; }
        void display(std::ostream& os) const override { os << "MyType2{" << a << "}"; }
    };
    
    static inline std::ostream& operator<<(std::ostream& os, Base const& b) { return b.display(os), os; }
    
    struct MyArchiveData {
        std::vector<std::unique_ptr<Base>> data;
    
        void serialize(auto& ar, unsigned version) {
            switch (version) {
            case 0: {
                ar.template register_type<MyType1>();
                ar.template register_type<MyType2>();
                ar& data;
                break;
            }
            default:
                using E = boost::archive::archive_exception;
                throw E(E::exception_code::unsupported_class_version);
            }
        }
    };
    
    int main() {
        {
            std::ofstream ofs("output.bin", std::ios::binary);
            boost::archive::binary_oarchive oa{ofs};
    
            MyArchiveData db;
            db.data.emplace_back(new MyType1(42, 12));
            db.data.emplace_back(new MyType2(32.f));
    
            oa << db;
        }
        {
            std::ifstream ifs("output.bin", std::ios::binary);
            boost::archive::binary_iarchive ia{ifs};
    
            MyArchiveData db;
            ia >> db;
    
            for (auto& el : db.data)
                std::cout << *el << "\n";
        }
    }
    

    Printing

    MyType1{42, 12}
    MyType2{32}
    

    Note how I took the opportunity to get rid of raw pointers.

    Introducing V1 of MyArchiveData

    We declare the new class version:

    BOOST_CLASS_VERSION(MyArchiveData, 1)
    

    And implement the new logic:

    struct MyArchiveData {
        std::vector<std::unique_ptr<Base>> data;
    
        void serialize(auto& ar, unsigned version) {
            switch (version) {
            case 0: {
                ar.template register_type<MyType1>();
                ar.template register_type<MyType2>();
                ar& data;
                break;
            }
    #ifdef V1
            case 1: {
                // MyType1 forgotten!
                ar.template register_type<MyType2>();
                ar& data;
                break;
            }
    #endif
            default:
                using E = boost::archive::archive_exception;
                throw E(E::exception_code::unsupported_class_version);
            }
        }
    };
    

    See the results Live On Coliru: V1 vs V0

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/serialization/serialization.hpp>
    #include <boost/serialization/unique_ptr.hpp>
    #include <boost/serialization/vector.hpp>
    #include <boost/serialization/version.hpp>
    #include <fstream>
    #include <iomanip>
    #include <iostream>
    
    struct Base {
        virtual ~Base() = default;
        virtual void display(std::ostream& os) const = 0;
        void serialize(auto&, unsigned) {}
    };
    
    struct MyType1 : public Base {
        int i, j;
        MyType1(int i_ = 0, int j_ = 0) : i{i_}, j{j_} {}
        void serialize(auto& ar, unsigned) { ar& boost::serialization::base_object<Base>(*this) & i& j; }
        void display(std::ostream& os) const override { os << "MyType1{" << i << ", " << j << "}"; }
    };
    
    struct MyType2 : public Base {
        float a;
        MyType2(float a_ = 0.f): a {a_} {}
        void serialize(auto& ar, unsigned) { ar & boost::serialization::base_object<Base>(*this) & a; }
        void display(std::ostream& os) const override { os << "MyType2{" << a << "}"; }
    };
    
    static inline std::ostream& operator<<(std::ostream& os, Base const& b) { return b.display(os), os; }
    
    struct MyArchiveData {
        std::vector<std::unique_ptr<Base>> data;
    
        void serialize(auto& ar, unsigned version) {
            switch (version) {
            case 0: {
                ar.template register_type<MyType1>();
                ar.template register_type<MyType2>();
                ar& data;
                break;
            }
    #ifdef V1
            case 1: {
                // MyType1 forgotten!
                ar.template register_type<MyType2>();
                ar& data;
                break;
            }
    #endif
            default:
                throw std::runtime_error("MyArchiveData: version not supported");
            }
        }
    };
    
    #ifndef V1
        // default class version is 0
        static constexpr bool is_V1 = false;
    #else
        BOOST_CLASS_VERSION(MyArchiveData, 1)
        static constexpr bool is_V1 = true;
    #endif
    
    int main(int argc, char** argv) {
        {
            std::ofstream ofs(is_V1 ? "v1.bin" : "v0.bin", std::ios::binary);
            boost::archive::binary_oarchive oa{ofs};
    
            MyArchiveData db;
            if (!is_V1)
                db.data.emplace_back(new MyType1(42, 12));
            db.data.emplace_back(new MyType2(32.f));
    
            oa << db;
        }
    
        for (auto fname : std::vector(argv + 1, argv + argc)) {
            std::cout << (is_V1?"V1":"V0") << " Reading " << std::quoted(fname) << std::endl;
            std::ifstream ifs(fname, std::ios::binary);
            boost::archive::binary_iarchive ia{ifs};
    
            MyArchiveData db;
            ia >> db;
    
            for (auto& el : db.data)
                std::cout << *el << std::endl;
        }
    }
    

    Testing with

    g++ -std=c++20 -O2 -Wall -pedantic -pthread main.cpp -lboost_serialization -DV0 -o v0
    g++ -std=c++20 -O2 -Wall -pedantic -pthread main.cpp -lboost_serialization -DV1 -o v1
    ./v0 v0.bin
    ./v1 v0.bin v1.bin
    # this should not work:
    ./v0 v1.bin
    

    Output:

    V0 Reading "v0.bin"
    MyType1{42, 12}
    MyType2{32}
    V1 Reading "v0.bin"
    MyType1{42, 12}
    MyType2{32}
    V1 Reading "v1.bin"
    MyType2{32}
    V0 Reading "v1.bin"
    terminate called after throwing an instance of 'std::runtime_error'
      what():  MyArchiveData: version not supported
    

    SUMMARIZING

    You will probably note that the advanced/object versioning approach requires you to keep the implementation of Type1 around. That's only partially true. You need it as long as you want to be able to consume the old archives.

    You could use the techniques shown above to make a conversion tool that converts old archives to the new version.

    Then when you're ready to drop the support for the old stuff and the corresponding class implementation you can drop the MyType1 definition altogether and remove the switch case for the old version(s).