c++lzw

LZW Decompression c++


I'm trying to code a lzw data compressor/decompressor, br/>
So i did make a compressor (who seems to work but maybe not) using this Algorithm but when i try to decompress it i get a weird result not looking at all like the original file ...

I think my mistake is in the way i get and/or use data from files but i dont know for sure...So here is the functions i use to compress and decompress and of course any critic / question is welcome .

EDIT : TO GIVE A MCVE
Input text file getting compressed containing : banana_bandana
Output result by decompression : ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü#


EDIT 2 : All useful function to reproduce the output :

using Encoding = uint16_t;
#define MAX 4096 //2^12
static int code = 0;

string combi (string s , char c ){
    s += c ;
    return s;
}

string findkey(unordered_map<string, int>& Dico , int val ){

    string key ;
    string empty ;
    unordered_map<string, int>::const_iterator it;

    for (it = Dico.begin(); it != Dico.end(); ++it){
        if (it->second == val){
            key = it->first;
            return key;
        }else {return empty;}
    }
}
string findkey2(unordered_map<string, Encoding>& Dico , Encoding val ){

    string key ;
    string empty ;
    unordered_map<string, Encoding>::const_iterator it;

    for (it = Dico.begin(); it != Dico.end(); ++it){
        if (it->second == val){
            key = it->first;
            return key;
        }else {return empty;}
    }
}

void InitDico (unordered_map<string, int>& Dico) {
    Dico.clear();
    string s = "";
    char c;
    for (code = 0; code < 256; code++)
    {
        c = (char)code;
        s += c;
        Dico[s] = code;
        s.clear();
    }

}

void InitDico2 (unordered_map<string, Encoding>& Dico) {
    Dico.clear();
    string s = "";
    char c;
    for (code = 0; code < 256; code++)
    {
        c = (char)code;
        s+= c;
        Encoding sizeplus = Dico.size();
        Dico[s] = sizeplus;
        s.clear();
    }
}


void compress(ifstream &is, ofstream &of){
    unordered_map<string,int> Dico ;
    InitDico(Dico);
    string s = "";
    char c ;

    while(is.get(c)){
        if(Dico.size() == MAX){
            InitDico(Dico);
        }
        if(Dico.count(combi(s,c))){
            s += c;
        }else{
            Dico.insert({(combi(s,c)),code});
            code ++;
            of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(code));
            s = c;
        }
    }
    of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(code));
} 

void compress2(ifstream &is, ofstream &of){
    unordered_map<string,Encoding> Dico ;
    InitDico2(Dico);
    string s = "";
    char c ;
    int max = numeric_limits<Encoding>::max();
    while(is.get(c)){
        if(Dico.size() == max){
            InitDico2(Dico);
        }
        if(Dico.count(combi(s,c))){
            s += c;
        }else{
            Encoding sizeplus = Dico.size();
            Dico[{(combi(s,c))}] = sizeplus;
            of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(Encoding));
            s = c;
        }
    }
    of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(Encoding));
} 

void decompress(ifstream &is, ofstream &of){
    unordered_map<string,int> Dico ;
    InitDico(Dico);
    string s , prevstring;
    char c ;
    int prevcode,currcode ;
    is.read(reinterpret_cast<char *>(&prevcode),sizeof(prevcode));
    s = findkey(Dico,prevcode);
    of.write(reinterpret_cast<const char *> (&s) , sizeof(s));

    while(is.read(reinterpret_cast<char *>(&currcode),sizeof(currcode))){
        s = findkey(Dico,currcode);
        of.write(reinterpret_cast<const char *> (&s) , sizeof(s));
        c =s[0];
        prevstring = findkey(Dico,prevcode);
        Dico.insert({(combi(prevstring,c)),code});
        prevcode = currcode;
    }
}

void decompress2(ifstream &is, ofstream &of){//Decompression using uint16 and another algorithm
    unordered_map<string,Encoding> Dico ;
    InitDico2(Dico);
    Encoding n ;
    is.read(reinterpret_cast<char*>(&n),sizeof(n));
    string v = findkey2(Dico,n);
    string w ;
    string entry;
    of.write(reinterpret_cast<const char *> (&v) , sizeof(v));
    w = v ;
    while(is.read(reinterpret_cast<char *>(&n),sizeof(n))){
        v = findkey2(Dico,n);
        if (Dico.count(v)){
            entry = v ;
        }else{entry = combi(w,w[0]);}
        of.write(reinterpret_cast<const char *> (&entry) , sizeof(entry));
        Encoding sizeplus =  Dico.size();
        Dico[combi(w,entry[0])]=sizeplus;
        w = entry;
    }

}

Solution

  • One problem I see is when you're writing out your decompressed data to the file, you write the string object, and not the string data contained within the object. To do that, you need to get to the data the object holds. In decompress, replace both of your writes with

    of.write(s.c_str(), s.length());
    

    A similar change needs to be made in decompress2 (twice).