I tried implementing LZW encoding/decoding and ended up with the following code
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
using Index = std::int16_t;
void encode(std::istream &input, std::ostream &output) {
Index index{0};
std::unordered_map<std::string, Index> dictionary{};
for (int i = 0; i < 256; ++i) {
dictionary[{static_cast<char>(i & 0xFF)}] = index++;
}
char k;
input.read(&k, sizeof(char));
std::string buffer{""};
while (input) {
const auto tmp = buffer + k;
if (dictionary.contains(tmp))
buffer = tmp;
else {
dictionary[tmp] = index++;
output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
sizeof(Index));
buffer = {k};
}
input.read(&k, sizeof(char));
}
output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
sizeof(Index));
}
void decode(std::istream &input, std::ostream &output) {
Index index{0};
std::unordered_map<Index, std::string> dictionary{};
for (int i = 0; i < 256; ++i) {
dictionary[index++] = {static_cast<char>(i & 0xFF)};
}
Index k;
input.read(reinterpret_cast<char *>(&k), sizeof(Index));
output << dictionary[k];
Index old{k};
std::string buffer;
while (input) {
input.read(reinterpret_cast<char *>(&k), sizeof(Index));
buffer = dictionary[old];
std::string tmp;
if (dictionary.contains(k)) {
const auto &entry = dictionary[k];
tmp = buffer + entry.front();
output << entry;
} else {
tmp = buffer + buffer.front();
output << tmp;
}
dictionary[index++] = tmp;
old = k;
}
}
It kinda works but the last character after decoding is duplicated:
int main() {
std::string input{"hello world!"};
std::istringstream iss{input};
std::stringstream ss{};
encode(iss, ss);
decode(ss, std::cout);
}
Outputs hello world!!
instead of hello world!
. I can't find my mistake, maybe someone else can?
As @NathanOliver pointed out in the comments changing the decode loop to
while (input.read(reinterpret_cast<char *>(&k), sizeof(Index))) {
fixes it.