The below code runs smoothly if clean() is called without the 32 times loop but with this loop it hits a segmentation fault and i can't seem to find out why and where, So i ran it in valgrind
and on the 17th call of clean() i got this:
==133746== Invalid read of size 8
==133746== at 0x10979F: construct (in /home/alexio/gent)
==133746== by 0x109CB1: dynamic (in /home/alexio/gent)
==133746== by 0x10A0FA: infgen (in /home/alexio/gent)
==133746== by 0x10A1C3: clean (in /home/alexio/gent)
==133746== by 0x10A226: main (in /home/alexio/gent)
==133746== Address 0x1001ffefff568 is not stack'd, malloc'd or (recently) free'd
==133746==
==133746==
==133746== Process terminating with default action of signal 11 (SIGSEGV): dumping core
And my code:
#define IG_ERRS (sizeof(inferr)/sizeof(char *))
#define IG_INCOMPLETE 1
#define IG_OK 0
#define IG_BLOCK_TYPE_ERR -1
#define IG_STORED_LENGTH_ERR -2
#define IG_TOO_MANY_CODES_ERR -3
#define IG_CODE_LENGTHS_CODE_OVER_ERR -4
#define IG_CODE_LENGTHS_CODE_UNDER_ERR -5
#define IG_REPEAT_NO_FIRST_ERR -6
#define IG_REPEAT_TOO_MANY_ERR -7
#define IG_LITLEN_CODE_OVER_ERR -8
#define IG_LITLEN_CODE_UNDER_ERR -9
#define IG_DIST_CODE_OVER_ERR -10
#define IG_DIST_CODE_UNDER_ERR -11
#define IG_NO_END_CODE_ERR -12
#define IG_BAD_CODE_ERR -13
#define MAXBITS 15 // maximum bits in a code
#define MAXLCODES 286 // maximum number of literal/length codes
#define MAXDCODES 30 // maximum number of distance codes
#define MAXCODES (MAXLCODES+MAXDCODES) // maximum codes lengths to read
#define FIXLCODES 288 // number of fixed literal/length codes
#define MAXDIST 32768 // maximum match distance
#define MAXSEQS 20
#define LINELEN 79 // target line length for data and literal commands
#define SEQCOL 24 // column in which to start bit sequence comments
struct state {
// Flags and settings (output-related fields removed).
int info;
int data;
int tree;
int draw;
unsigned max;
unsigned win;
// Input data buffer.
const uint8_t *in; // Input data array
size_t in_size; // Size of the input data array
size_t in_pos; // Current read position in input data array
// Bit and chunk processing.
int bitcnt;
int bitbuf;
int seqs;
short seq[MAXSEQS];
char len[MAXSEQS];
// Block and total statistics.
unsigned reach;
unsigned headlen;
uintmax_t blockin;
uintmax_t blockout;
uintmax_t symbols;
uintmax_t matches;
uintmax_t matchlen;
uintmax_t litbits;
uintmax_t matbits;
uintmax_t blocks;
uintmax_t inbits;
uintmax_t outbytes;
uintmax_t symbnum;
uintmax_t matchnum;
uintmax_t matchtot;
uintmax_t littot;
};
int get(struct state *s) {
uint8_t v;
if (s->in_pos >= s->in_size){
s->in_pos = 0;
return -1; // End of input array
}
v = s->in[s->in_pos];
s->in_pos++;
return v; // Return next byte and increment position
}
int bits(struct state *s, int need) {
long val = s->bitbuf;
while (s->bitcnt < need) {
int next = get(s);
if (next == -1)
return -1; // out of input
val |= (long)(next) << s->bitcnt; // load eight bits
s->bitcnt += 8;
}
s->bitbuf = (int)(val >> need);
s->bitcnt -= need;
s->blockin += need;
val &= (1L << need) - 1;
if (s->draw > 1 && s->seqs < MAXSEQS) {
s->seq[s->seqs] = val;
s->len[s->seqs] = need;
s->seqs++;
}
return (int)val;
}
void end(struct state *s) {
s->blocks++;
s->inbits += s->blockin;
s->outbytes += s->blockout;
s->symbnum += s->symbols;
}
struct huffman {
short *count; // number of symbols of each length
short *symbol; // canonically ordered symbols
};
int decode(struct state *s, struct huffman *h) {
int bitbuf = s->bitbuf; // bits to decode from the input
int left = s->bitcnt; // number of bits in bitbuf
int len = 1; // length of code in consideration
int code = 0; // len bits pulled from bitbuf
int first = 0; // first code of length len
int index = 0; // index of that code in the symbol table
short *next = h->count + 1; // pointer to number of codes of next length
for (;;) {
while (left--) {
code |= bitbuf & 1;
bitbuf >>= 1;
int count = *next++;
if (code < first + count) {
// This code is length len. Save bit sequence.
if (s->draw > 1 && s->seqs < MAXSEQS) {
// Reverse the code for showing in the comment.
int rev = 0;
for (int i = 0; i < len; i++)
rev = (rev << 1) | ((code >> i) & 1);
s->seq[s->seqs] = rev;
s->len[s->seqs] = len;
s->seqs++;
}
s->bitbuf = bitbuf;
s->bitcnt = (s->bitcnt - len) & 7;
s->blockin += len;
return h->symbol[index + (code - first)];
}
// Update to find a code of the next length.
index += count;
first += count;
first <<= 1;
code <<= 1;
len++;
}
// Need to load more bits from the input into bitbuf.
left = (MAXBITS+1) - len;
if (left == 0)
break;
bitbuf = get(s);
if (bitbuf == -1)
return -1; // out of input
if (left > 8)
left = 8;
}
return IG_BAD_CODE_ERR; // ran out of codes
}
int construct(struct huffman *h, short *length, int n) {
// Count the number of codes of each length.
for (int len = 0; len <= MAXBITS; len++)
h->count[len] = 0;
for (int symbol = 0; symbol < n; symbol++)
(h->count[length[symbol]])++; // assumes lengths are within bounds
if (h->count[0] == n) // no codes!
return 0; // complete, but decode() will fail
// Check for an over-subscribed or incomplete set of lengths.
int left = 1; // one possible code of zero length
for (int len = 1; len <= MAXBITS; len++) {
left <<= 1; // one more bit, double codes left
left -= h->count[len]; // deduct count from possible codes
if (left < 0){
return left;
} // over-subscribed--return negative
} // left > 0 means incomplete
// Generate offsets into symbol table for each length for sorting.
short offs[MAXBITS+1]; // offsets in symbol table for each length
offs[1] = 0;
for (int len = 1; len < MAXBITS; len++)
offs[len + 1] = offs[len] + h->count[len];
for (int symbol = 0; symbol < n; symbol++)
if (length[symbol] != 0)
h->symbol[offs[length[symbol]]++] = symbol;
// Return zero for complete set, positive for incomplete set.
return left;
}
// Decode literal/length and distance codes until an end-of-block code.
int codes(struct state *s,struct huffman *lencode,struct huffman *distcode, int *found, int *trak) {
static const short lens[29] = { // size base for length codes 257..285
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
static const short lext[29] = { // extra bits for length codes 257..285
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
static const short dists[30] = { // offset base for distance codes 0..29
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
8193, 12289, 16385, 24577};
static const short dext[30] = { // extra bits for distance codes 0..29
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13};
// Decode literals and length/distance pairs.
int symbol;
do {
uintmax_t beg = s->blockin;
symbol = decode(s, lencode);
s->symbols++;
if (symbol < 0)
return symbol; // invalid symbol
if (symbol < 256) { // literal: symbol is the byte
// Write out the literal.
if (symbol == 77 && *trak ==0){
*trak= *trak+1;
}
if(symbol == 73 && *trak == 1){
*trak = *trak+1;
}
if (symbol == 77 && *trak ==2){
*trak = *trak+1;
}
if(symbol == 69 && *trak == 3){
*trak++;
*found = 1;
return IG_OK;
}
s->blockout += 1;
if (s->max < s->win)
s->max++;
s->litbits += s->blockin - beg;
}
else if (symbol > 256) { // length
// Get and compute length.
if (symbol >= MAXLCODES)
return IG_BAD_CODE_ERR; // invalid fixed code
symbol -= 257;
int len = lens[symbol] + bits(s, lext[symbol]);
// Get distance.
symbol = decode(s, distcode);
if (symbol < 0)
return symbol; // invalid symbol
unsigned dist = dists[symbol] + bits(s, dext[symbol]);
if (dist > s->max) {
s->max = MAXDIST; // issue warning only once
}
// Update state for match.
if (dist > s->blockout) {
dist -= s->blockout;
if (dist > s->reach)
s->reach = dist;
}
s->blockout += len;
s->matches++;
s->matchlen += len;
if (s->max < s->win) {
if (len > (int)(s->win - s->max))
s->max = s->win;
else
s->max += len;
}
s->matbits += s->blockin - beg;
}
} while (symbol != 256); // end of block symbol
s->symbols--;
// Done with a valid fixed or dynamic block.
return IG_OK;
}
// Process a dynamic codes block.
int dynamic(struct state *s, int *found, int *trak) {
// Get number of lengths in each table, check lengths.
int nlen = bits(s, 5) + 257;
int ndist = bits(s, 5) + 1;
int ncode = bits(s, 4) + 4;
if (nlen > MAXLCODES || ndist > MAXDCODES){
return IG_TOO_MANY_CODES_ERR; // bad counts
}
// Read code length code lengths (really), missing lengths are zero.
short lengths[MAXCODES]; // descriptor code lengths
static const short order[19] = // permutation of code length codes
{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
int index;
for (index = 0; index < ncode; index++) {
int len = bits(s, 3);
lengths[order[index]] = len;
}
for (; index < 19; index++)
lengths[order[index]] = 0;
// Build huffman table for code lengths codes (use lencode temporarily).
short lencnt[MAXBITS+1], lensym[MAXLCODES]; // lencode memory
struct huffman lencode = {lencnt, lensym}; // length code
int err = construct(&lencode, lengths, 19);
if (err < 0)
return IG_CODE_LENGTHS_CODE_OVER_ERR; // oversubscribed
else if (err > 0)
return IG_CODE_LENGTHS_CODE_UNDER_ERR; // incomplete
// Read length/literal and distance code length tables.
index = 0;
while (index < nlen + ndist) {
int symbol = decode(s, &lencode);
if (symbol < 0)
return symbol; // invalid symbol
if (symbol < 16) { // length in 0..15
lengths[index++] = symbol;
}
else { // repeat instruction
int len = -1; // assume repeating zeros
if (symbol == 16) { // repeat last length 3..6 times
if (index == 0)
return IG_REPEAT_NO_FIRST_ERR; // no last length!
len = lengths[index - 1]; // last length
symbol = 3 + bits(s, 2);
}
else if (symbol == 17) // repeat zero 3..10 times
symbol = 3 + bits(s, 3);
else // == 18, repeat zero 11..138 times
symbol = 11 + bits(s, 7);
if (index + symbol > nlen + ndist)
return IG_REPEAT_TOO_MANY_ERR; // too many lengths!
if (len == -1)
len = 0;
while (symbol--) // repeat last or zero symbol times
lengths[index++] = len;
}
}
if (lengths[256] == 0)
return IG_NO_END_CODE_ERR;
err = construct(&lencode, lengths, nlen);
if (err < 0)
return IG_LITLEN_CODE_OVER_ERR;
else if (err > 0 && nlen - lencode.count[0] != 1)
return IG_LITLEN_CODE_UNDER_ERR; // incomplete with one code ok
// Build huffman table for distance codes.
short distcnt[MAXBITS+1], distsym[MAXDCODES]; // distcode memory
struct huffman distcode = {distcnt, distsym}; // distance code
err = construct(&distcode, lengths + nlen, ndist);
if (err < 0)
return IG_DIST_CODE_OVER_ERR;
else if (err > 0 && ndist - distcode.count[0] != 1)
return IG_DIST_CODE_UNDER_ERR; // incomplete with one code ok
// Decode data until end-of-block code.
s->headlen = s->blockin;
printf("we are here\n");
return codes(s, &lencode, &distcode, found, trak);
}
int infgen(struct state *s, int *found, int *trak) {
// Initialize input state.
s->bitcnt = 0;
s->bitbuf = 0;
s->seqs = 0;
s->max = 0;
// Initialize statistics.
s->blocks = 0;
s->inbits = 0;
s->outbytes = 0;
s->symbnum = 0;
s->matchnum = 0;
s->matchtot = 0;
s->littot = 0;
// Return if bits() or decode() tries to read past available input.
int err = 0;
if (*found != 0) { // if came back here via longjmp()
err = IG_INCOMPLETE; // then skip do-loop, return error
return -1;
} else {
// Process blocks until last block or error.
int last;
s->reach = 0;
s->blockin = 0;
s->blockout = 0;
s->symbols = 0;
s->matches = 0;
s->matchlen = 0;
s->litbits = 0;
s->matbits = 0;
last = bits(s, 1); // one if last block
printf("last is %d\n",last);
if(last > 0){
return -4;
}
int type = bits(s, 2); // block type 0..3
if(type == 2){
err = dynamic(s,found, trak);
if(err == 0){
return 0;
}
}else{
return err;
}
}
}
void clean(uint8_t *input, size_t length, int *found){
struct state s;
s.info = 0;
s.data = 1;
s.tree = 1;
s.draw = 0;
s.win = MAXDIST;
s.in_pos = 0;
// Set input to the provided byte array
s.in = input;
s.in_size = length; // Assuming you have a way to track the length of the input array
// Process the deflate data
int trak=0;
int ret;
// Handle deflate data processing
printf("there you go\n");
ret = infgen(&s,found,&trak);
*found =ret;
}
int main(){
uint8_t data[5];
data[1] = 0x9A;
data[2] = 0xC7;
data[3] = 0x92;
data[4] = 0x12;
int ret =0;
uint64_t red=0;
for(int i =0; i < 32; i++){
data[0] = (i<<3)| 4;
clean(data,5,&ret);
if(ret != -1 && ret !=0){
red++;
}
ret=0;
}
printf("Count of non -1 results: %u\n", red);
return 0;
}
This loop seems to be responsible but its not clear how:
for (int symbol = 0; symbol < n; symbol++)
if (length[symbol] != 0)
h->symbol[offs[length[symbol]]++] = symbol;
This code is a test to see if one can obtain a combination bytes for a deflate header while assuming other information like block type which in this case limits the first byte to only 32 possibilities and also i wanted to see if the decompression code can handle being called multiple times.
You need to learn one thing - the programs do not debug by looking at them for a long time. To debug you need to put some effort.
Add assertions to check if you pass valid pointers, if you do not access the arrays outside the bounds, initialize all pointers to NULL and when you stop using it set to NULL.
Also add many printf
s to see if the variables are having expected values.
Also Enable all possible warnings and do not ignore them (you can force yourself by treating warnings as errors in GCC by using -Werror
command line switch)
It requires some work but you will find the problem in no time.