csegmentation-fault

how to find the root of segmentation fault in C code?


The below code runs smoothly if clean() is called without the 32 times loop but with this loop it hits a segmentation fault and i can't seem to find out why and where, So i ran it in valgrind and on the 17th call of clean() i got this:

    ==133746== Invalid read of size 8
    ==133746==    at 0x10979F: construct (in /home/alexio/gent)
    ==133746==    by 0x109CB1: dynamic (in /home/alexio/gent)
    ==133746==    by 0x10A0FA: infgen (in /home/alexio/gent)
    ==133746==    by 0x10A1C3: clean (in /home/alexio/gent)
    ==133746==    by 0x10A226: main (in /home/alexio/gent)
    ==133746==  Address 0x1001ffefff568 is not stack'd, malloc'd or (recently) free'd
    ==133746== 
    ==133746== 
    ==133746== Process terminating with default action of signal 11 (SIGSEGV): dumping core

And my code:

#define IG_ERRS (sizeof(inferr)/sizeof(char *))
#define IG_INCOMPLETE 1
#define IG_OK 0
#define IG_BLOCK_TYPE_ERR -1
#define IG_STORED_LENGTH_ERR -2
#define IG_TOO_MANY_CODES_ERR -3
#define IG_CODE_LENGTHS_CODE_OVER_ERR -4
#define IG_CODE_LENGTHS_CODE_UNDER_ERR -5
#define IG_REPEAT_NO_FIRST_ERR -6
#define IG_REPEAT_TOO_MANY_ERR -7
#define IG_LITLEN_CODE_OVER_ERR -8
#define IG_LITLEN_CODE_UNDER_ERR -9
#define IG_DIST_CODE_OVER_ERR -10
#define IG_DIST_CODE_UNDER_ERR -11
#define IG_NO_END_CODE_ERR -12
#define IG_BAD_CODE_ERR -13
#define MAXBITS 15              // maximum bits in a code
#define MAXLCODES 286           // maximum number of literal/length codes
#define MAXDCODES 30            // maximum number of distance codes
#define MAXCODES (MAXLCODES+MAXDCODES)  // maximum codes lengths to read
#define FIXLCODES 288           // number of fixed literal/length codes
#define MAXDIST 32768           // maximum match distance
#define MAXSEQS 20  
#define LINELEN 79      // target line length for data and literal commands
#define SEQCOL 24       // column in which to start bit sequence comments

struct state {
    // Flags and settings (output-related fields removed).
    int info;
    int data;
    int tree;
    int draw;
    unsigned max;
    unsigned win;
    // Input data buffer.
    const uint8_t *in;     // Input data array
    size_t in_size;             // Size of the input data array
    size_t in_pos;              // Current read position in input data array
    // Bit and chunk processing.
    int bitcnt;
    int bitbuf;
    int seqs;
    short seq[MAXSEQS];
    char len[MAXSEQS];
    // Block and total statistics.
    unsigned reach;
    unsigned headlen;
    uintmax_t blockin;
    uintmax_t blockout;
    uintmax_t symbols;
    uintmax_t matches;
    uintmax_t matchlen;
    uintmax_t litbits;
    uintmax_t matbits;
    uintmax_t blocks;
    uintmax_t inbits;
    uintmax_t outbytes;
    uintmax_t symbnum;
    uintmax_t matchnum;
    uintmax_t matchtot;
    uintmax_t littot;
};

int get(struct state *s) {
    uint8_t v;
    if (s->in_pos >= s->in_size){
        s->in_pos = 0;
         return -1;  // End of input array
     }
    v = s->in[s->in_pos];
    s->in_pos++;
    return v;  // Return next byte and increment position
}

int bits(struct state *s, int need) {
    long val = s->bitbuf;
    while (s->bitcnt < need) {
        int next = get(s);
        if (next == -1)
            return -1;  // out of input
        val |= (long)(next) << s->bitcnt;  // load eight bits
        s->bitcnt += 8;
    }
    s->bitbuf = (int)(val >> need);
    s->bitcnt -= need;
    s->blockin += need;
    val &= (1L << need) - 1;
    if (s->draw > 1 && s->seqs < MAXSEQS) {
        s->seq[s->seqs] = val;
        s->len[s->seqs] = need;
        s->seqs++;
    }
    return (int)val;
}

void end(struct state *s) {
    s->blocks++;
    s->inbits += s->blockin;
    s->outbytes += s->blockout;
    s->symbnum += s->symbols;
}

struct huffman {
    short *count;       // number of symbols of each length
    short *symbol;      // canonically ordered symbols
};

int decode(struct state *s, struct huffman *h) {
    int bitbuf = s->bitbuf;     // bits to decode from the input
    int left = s->bitcnt;       // number of bits in bitbuf
    int len = 1;                // length of code in consideration
    int code = 0;               // len bits pulled from bitbuf
    int first = 0;              // first code of length len
    int index = 0;              // index of that code in the symbol table
    short *next = h->count + 1; // pointer to number of codes of next length
    for (;;) {
        while (left--) {
            code |= bitbuf & 1;
            bitbuf >>= 1;
            int count = *next++;
            if (code < first + count) {
                // This code is length len. Save bit sequence.
                if (s->draw > 1 && s->seqs < MAXSEQS) {
                    // Reverse the code for showing in the comment.
                    int rev = 0;
                    for (int i = 0; i < len; i++)
                        rev = (rev << 1) | ((code >> i) & 1);
                    s->seq[s->seqs] = rev;
                    s->len[s->seqs] = len;
                    s->seqs++;
                }
                s->bitbuf = bitbuf;
                s->bitcnt = (s->bitcnt - len) & 7;
                s->blockin += len;
                return h->symbol[index + (code - first)];
            }
            // Update to find a code of the next length.
            index += count;
            first += count;
            first <<= 1;
            code <<= 1;
            len++;
        }
        // Need to load more bits from the input into bitbuf.
        left = (MAXBITS+1) - len;
        if (left == 0)
            break;
        bitbuf = get(s);
        if (bitbuf == -1)
            return -1;         // out of input
        if (left > 8)
            left = 8;
    }
    return IG_BAD_CODE_ERR;             // ran out of codes
}

int construct(struct huffman *h, short *length, int n) {
    // Count the number of codes of each length.
    for (int len = 0; len <= MAXBITS; len++)
        h->count[len] = 0;
    for (int symbol = 0; symbol < n; symbol++)
        (h->count[length[symbol]])++;   // assumes lengths are within bounds
    if (h->count[0] == n)               // no codes!
        return 0;                       // complete, but decode() will fail
    // Check for an over-subscribed or incomplete set of lengths.
    int left = 1;                       // one possible code of zero length
    for (int len = 1; len <= MAXBITS; len++) {
        left <<= 1;                     // one more bit, double codes left
        left -= h->count[len];          // deduct count from possible codes
        if (left < 0){
            return left; 
            }               // over-subscribed--return negative
    }                                   // left > 0 means incomplete
    // Generate offsets into symbol table for each length for sorting.
    short offs[MAXBITS+1];      // offsets in symbol table for each length
    offs[1] = 0;
    for (int len = 1; len < MAXBITS; len++)
        offs[len + 1] = offs[len] + h->count[len];
    for (int symbol = 0; symbol < n; symbol++)
        if (length[symbol] != 0)
            h->symbol[offs[length[symbol]]++] = symbol;
    // Return zero for complete set, positive for incomplete set.
    return left;
}
// Decode literal/length and distance codes until an end-of-block code.
int codes(struct state *s,struct huffman *lencode,struct huffman *distcode, int *found, int *trak) {
    static const short lens[29] = { // size base for length codes 257..285
        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
    static const short lext[29] = { // extra bits for length codes 257..285
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
    static const short dists[30] = { // offset base for distance codes 0..29
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
        8193, 12289, 16385, 24577};
    static const short dext[30] = { // extra bits for distance codes 0..29
        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
        12, 12, 13, 13};
    // Decode literals and length/distance pairs.
    int symbol;
    do {
        uintmax_t beg = s->blockin;
        symbol = decode(s, lencode);
        s->symbols++;        
        if (symbol < 0)
            return symbol;              // invalid symbol
        if (symbol < 256) {             // literal: symbol is the byte
            // Write out the literal.
            if (symbol == 77 && *trak ==0){
                *trak= *trak+1;
            }
            if(symbol == 73 && *trak == 1){
                *trak = *trak+1;
            } 
            if (symbol == 77 && *trak ==2){
                *trak = *trak+1;
            }
            if(symbol == 69 && *trak == 3){
                *trak++;
                *found = 1;
                return IG_OK;
            } 
            s->blockout += 1;
            if (s->max < s->win)
                s->max++;
            s->litbits += s->blockin - beg;
        }
        else if (symbol > 256) {        // length
            // Get and compute length.
            if (symbol >= MAXLCODES)
                return IG_BAD_CODE_ERR;         // invalid fixed code
            symbol -= 257;
            int len = lens[symbol] + bits(s, lext[symbol]);
            // Get distance.
            symbol = decode(s, distcode);
            if (symbol < 0)
                return symbol;                  // invalid symbol
            unsigned dist = dists[symbol] + bits(s, dext[symbol]);
            if (dist > s->max) {
                s->max = MAXDIST;       // issue warning only once
            }
            // Update state for match.
            if (dist > s->blockout) {
                dist -= s->blockout;
                if (dist > s->reach)
                    s->reach = dist;
            }
            s->blockout += len;
            s->matches++;
            s->matchlen += len;
            if (s->max < s->win) {
                if (len > (int)(s->win - s->max))
                    s->max = s->win;
                else
                    s->max += len;
            }
            s->matbits += s->blockin - beg;
        }
    } while (symbol != 256);            // end of block symbol
    s->symbols--;
    // Done with a valid fixed or dynamic block.
    return IG_OK;
}
// Process a dynamic codes block.
int dynamic(struct state *s, int *found, int *trak) {
    // Get number of lengths in each table, check lengths.
    int nlen = bits(s, 5) + 257;
    int ndist = bits(s, 5) + 1;
    int ncode = bits(s, 4) + 4;
    if (nlen > MAXLCODES || ndist > MAXDCODES){
        return IG_TOO_MANY_CODES_ERR;       // bad counts
    }
    // Read code length code lengths (really), missing lengths are zero.
    short lengths[MAXCODES];            // descriptor code lengths
    static const short order[19] =      // permutation of code length codes
        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
    int index;
    for (index = 0; index < ncode; index++) {
        int len = bits(s, 3);
        lengths[order[index]] = len;
    }
    for (; index < 19; index++)
        lengths[order[index]] = 0;
    // Build huffman table for code lengths codes (use lencode temporarily).
    short lencnt[MAXBITS+1], lensym[MAXLCODES];         // lencode memory
    struct huffman lencode = {lencnt, lensym};          // length code
    int err = construct(&lencode, lengths, 19);
    if (err < 0)
        return IG_CODE_LENGTHS_CODE_OVER_ERR;   // oversubscribed
    else if (err > 0)
        return IG_CODE_LENGTHS_CODE_UNDER_ERR;  // incomplete
    // Read length/literal and distance code length tables.
    index = 0;
    while (index < nlen + ndist) {
        int symbol = decode(s, &lencode);
        if (symbol < 0)
            return symbol;              // invalid symbol
        if (symbol < 16) {              // length in 0..15
            lengths[index++] = symbol;
        }
        else {                          // repeat instruction
            int len = -1;               // assume repeating zeros
            if (symbol == 16) {         // repeat last length 3..6 times
                if (index == 0)
                    return IG_REPEAT_NO_FIRST_ERR;  // no last length!
                len = lengths[index - 1];           // last length
                symbol = 3 + bits(s, 2);
            }
            else if (symbol == 17)      // repeat zero 3..10 times
                symbol = 3 + bits(s, 3);
            else                        // == 18, repeat zero 11..138 times
                symbol = 11 + bits(s, 7);
            if (index + symbol > nlen + ndist)
                return IG_REPEAT_TOO_MANY_ERR;  // too many lengths!
            if (len == -1)
                len = 0;
            while (symbol--)            // repeat last or zero symbol times
                lengths[index++] = len;
        }
    }
    if (lengths[256] == 0)
        return IG_NO_END_CODE_ERR;
    err = construct(&lencode, lengths, nlen);
    if (err < 0)
        return IG_LITLEN_CODE_OVER_ERR;
    else if (err > 0 && nlen - lencode.count[0] != 1)
        return IG_LITLEN_CODE_UNDER_ERR;    // incomplete with one code ok
    // Build huffman table for distance codes.
    short distcnt[MAXBITS+1], distsym[MAXDCODES];       // distcode memory
    struct huffman distcode = {distcnt, distsym};       // distance code
    err = construct(&distcode, lengths + nlen, ndist);
    if (err < 0)
        return IG_DIST_CODE_OVER_ERR;
    else if (err > 0 && ndist - distcode.count[0] != 1)
        return IG_DIST_CODE_UNDER_ERR;      // incomplete with one code ok
    // Decode data until end-of-block code.
    s->headlen = s->blockin;
    printf("we are here\n");
    return codes(s, &lencode, &distcode, found, trak);
}

int infgen(struct state *s, int *found, int *trak) {
    // Initialize input state.
    s->bitcnt = 0;
    s->bitbuf = 0;
    s->seqs = 0;
    s->max = 0;
    // Initialize statistics.
    s->blocks = 0;
    s->inbits = 0;
    s->outbytes = 0;
    s->symbnum = 0;
    s->matchnum = 0;
    s->matchtot = 0;
    s->littot = 0;
    // Return if bits() or decode() tries to read past available input.
    int err = 0;
    if (*found != 0) {  // if came back here via longjmp()
        err = IG_INCOMPLETE;     // then skip do-loop, return error
        return -1;
    } else {
        // Process blocks until last block or error.
        int last;
        s->reach = 0;
        s->blockin = 0;
        s->blockout = 0;
        s->symbols = 0;
        s->matches = 0;
        s->matchlen = 0;
        s->litbits = 0;
        s->matbits = 0;

        last = bits(s, 1);  // one if last block
        printf("last is %d\n",last);
        if(last > 0){
            return -4;
        }
        int type = bits(s, 2);  // block type 0..3
        if(type == 2){
            err = dynamic(s,found, trak);
            if(err == 0){
                return 0;
            }
        }else{
            return err;
        }
    }
    
}

void clean(uint8_t *input, size_t length, int *found){
    struct state s;
    s.info = 0;
    s.data = 1;
    s.tree = 1;
    s.draw = 0;
    s.win = MAXDIST;
    s.in_pos = 0;
    // Set input to the provided byte array
    s.in = input;
    s.in_size = length; // Assuming you have a way to track the length of the input array
    // Process the deflate data
    int trak=0;
    int ret;
    // Handle deflate data processing
    printf("there you go\n");
    ret = infgen(&s,found,&trak);
    *found =ret;    
}

int main(){
    uint8_t data[5];
    data[1] = 0x9A;
    data[2] = 0xC7;
    data[3] = 0x92;
    data[4] = 0x12;
    int ret =0;
    uint64_t red=0;
    
    for(int i =0; i < 32; i++){
        data[0] = (i<<3)| 4;                
        clean(data,5,&ret);
        if(ret != -1 && ret !=0){
            red++;
        }
        ret=0;
    }   
    printf("Count of non -1 results: %u\n", red);
    return 0;   
}

This loop seems to be responsible but its not clear how:

for (int symbol = 0; symbol < n; symbol++)
        if (length[symbol] != 0)
            h->symbol[offs[length[symbol]]++] = symbol;

This code is a test to see if one can obtain a combination bytes for a deflate header while assuming other information like block type which in this case limits the first byte to only 32 possibilities and also i wanted to see if the decompression code can handle being called multiple times.


Solution

  • You need to learn one thing - the programs do not debug by looking at them for a long time. To debug you need to put some effort.

    Add assertions to check if you pass valid pointers, if you do not access the arrays outside the bounds, initialize all pointers to NULL and when you stop using it set to NULL.

    Also add many printfs to see if the variables are having expected values.

    Also Enable all possible warnings and do not ignore them (you can force yourself by treating warnings as errors in GCC by using -Werror command line switch)

    It requires some work but you will find the problem in no time.