cgzipdeflatepkzip

What is LENGTHS CODE OVER error and why am i getting this?


I am working on an improved program that attempts to recover PKZIP keys with intention of CUDA support, So I integrated a decompression algorithm from infgen to decompress an array of bytes instead of file stream and look for specific byte sequence, in this case

MIME

, if those are found then it means the keys are correct.

This program works under the assumption that:

So for this test I am just reading the file with first 128 bytes without zip headers then try to decompress it and see if it works but unfortunately the Dynamic function returns -4 for some reason I do not understand but if I run ./infgen -dd file with the same file I get to the target bytes

Here is my code:

#define IG_INCOMPLETE 1
#define IG_OK 0
#define IG_BLOCK_TYPE_ERR -1
#define IG_STORED_LENGTH_ERR -2
#define IG_TOO_MANY_CODES_ERR -3
#define IG_CODE_LENGTHS_CODE_OVER_ERR -4
#define IG_CODE_LENGTHS_CODE_UNDER_ERR -5
#define IG_REPEAT_NO_FIRST_ERR -6
#define IG_REPEAT_TOO_MANY_ERR -7
#define IG_LITLEN_CODE_OVER_ERR -8
#define IG_LITLEN_CODE_UNDER_ERR -9
#define IG_DIST_CODE_OVER_ERR -10
#define IG_DIST_CODE_UNDER_ERR -11
#define IG_NO_END_CODE_ERR -12
#define IG_BAD_CODE_ERR -13

struct state {
    int info;
    int data;
    int tree;
    int draw;
    unsigned max;
    unsigned win;
    // Input data buffer.
    const uint8_t *in;     // Input data array
    size_t in_size;             // Size of the input data array
    size_t in_pos;              // Current read position in input data array
    // Bit and chunk processing.
    int bitcnt;
    int bitbuf;
    int seqs;
    short seq[MAXSEQS];
    char len[MAXSEQS];
    // Block and total statistics.
    unsigned reach;
    unsigned headlen;
    uintmax_t blockin;
    uintmax_t blockout;
    uintmax_t symbols;
    uintmax_t matches;
    uintmax_t matchlen;
    uintmax_t litbits;
    uintmax_t matbits;
    uintmax_t blocks;
    uintmax_t inbits;
    uintmax_t outbytes;
    uintmax_t symbnum;
    uintmax_t matchnum;
    uintmax_t matchtot;
    uintmax_t littot;
};

int get(struct state *s) {
    if (s->in_pos >= s->in_size) return -1;  // End of input array
    return s->in[s->in_pos++];           // Return next byte and increment position
}

int bits(struct state *s, int need) {
    long val = s->bitbuf;
    while (s->bitcnt < need) {
        int next = get(s);
        if (next == -1)
            return -1;                 // out of input
        val |= (long)(next) << s->bitcnt;       // load eight bits
        s->bitcnt += 8;
    }
    s->bitbuf = (int)(val >> need);
    s->bitcnt -= need;
    s->blockin += need;
    val &= (1L << need) - 1;
    if (s->draw > 1 && s->seqs < MAXSEQS) {
        s->seq[s->seqs] = val;
        s->len[s->seqs] = need;
        s->seqs++;
    }
    return (int)val;
}

// Show and accumulate statistics at end of block.
void end(struct state *s) {
    s->blocks++;
    s->inbits += s->blockin;
    s->outbytes += s->blockout;
    s->symbnum += s->symbols;
}

struct huffman {
    short *count;       // number of symbols of each length
    short *symbol;      // canonically ordered symbols
};

int decode(struct state *s, struct huffman *h) {
    int bitbuf = s->bitbuf;     // bits to decode from the input
    int left = s->bitcnt;       // number of bits in bitbuf
    int len = 1;                // length of code in consideration
    int code = 0;               // len bits pulled from bitbuf
    int first = 0;              // first code of length len
    int index = 0;              // index of that code in the symbol table
    short *next = h->count + 1; // pointer to number of codes of next length
    for (;;) {
        while (left--) {
            code |= bitbuf & 1;
            bitbuf >>= 1;
            int count = *next++;
            if (code < first + count) {
                if (s->draw > 1 && s->seqs < MAXSEQS) {
                    int rev = 0;
                    for (int i = 0; i < len; i++)
                        rev = (rev << 1) | ((code >> i) & 1);
                    s->seq[s->seqs] = rev;
                    s->len[s->seqs] = len;
                    s->seqs++;
                }
                s->bitbuf = bitbuf;
                s->bitcnt = (s->bitcnt - len) & 7;
                s->blockin += len;
                return h->symbol[index + (code - first)];
            }
            index += count;
            first += count;
            first <<= 1;
            code <<= 1;
            len++;
        }
        left = (MAXBITS+1) - len;
        if (left == 0)
            break;
        bitbuf = get(s);
        if (bitbuf == -1)
            return -1;         // out of input
        if (left > 8)
            left = 8;
    }
    return IG_BAD_CODE_ERR;             // ran out of codes
}

int construct(struct huffman *h, short *length, int n) {
    // Count the number of codes of each length.
    for (int len = 0; len <= MAXBITS; len++)
        h->count[len] = 0;
    for (int symbol = 0; symbol < n; symbol++)
        (h->count[length[symbol]])++;   // assumes lengths are within bounds
    if (h->count[0] == n)               // no codes!
        return 0;                       // complete, but decode() will fail

    // Check for an over-subscribed or incomplete set of lengths.
    int left = 1;                       // one possible code of zero length
    for (int len = 1; len <= MAXBITS; len++) {
        left <<= 1;                     // one more bit, double codes left
        left -= h->count[len];          // deduct count from possible codes
        if (left < 0)
            return left;                // over-subscribed--return negative
    }                                   // left > 0 means incomplete
    short offs[MAXBITS+1];      // offsets in symbol table for each length
    offs[1] = 0;
    for (int len = 1; len < MAXBITS; len++)
        offs[len + 1] = offs[len] + h->count[len];
    for (int symbol = 0; symbol < n; symbol++)
        if (length[symbol] != 0)
            h->symbol[offs[length[symbol]]++] = symbol;
    return left;
}
// Decode literal/length and distance codes until an end-of-block code.
int codes(struct state *s,struct huffman *lencode,struct huffman *distcode, int *found,int *trak) {
    static const short lens[29] = { // size base for length codes 257..285
        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
    static const short lext[29] = { // extra bits for length codes 257..285
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
    static const short dists[30] = { // offset base for distance codes 0..29
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
        8193, 12289, 16385, 24577};
    static const short dext[30] = { // extra bits for distance codes 0..29
        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
        12, 12, 13, 13};
    int symbol;
    do {
        uintmax_t beg = s->blockin;
        symbol = decode(s, lencode);
        s->symbols++;
        if (symbol < 0)
            return symbol;              // invalid symbol
        if (symbol < 256) {             // literal: symbol is the byte
            // Write out the literal.
            if (symbol == 0x4D && trak ==0){
                *trak++;
            }
            if(symbol == 0x49 && trak == 1){
                *trak++;
            } 
            if (symbol == 0x4D && trak ==2){
                *trak++;
            }
            if(symbol == 0x45 && trak == 3){
                *found = 1;
                return IG_OK;
            } 
            s->blockout += 1;
            if (s->max < s->win)
                s->max++;
            s->litbits += s->blockin - beg;
        }
        else if (symbol > 256) {        // length
            // Get and compute length.
            if (symbol >= MAXLCODES)
                return IG_BAD_CODE_ERR;         // invalid fixed code
            symbol -= 257;
            int len = lens[symbol] + bits(s, lext[symbol]);

            // Get distance.
            symbol = decode(s, distcode);
            if (symbol < 0)
                return symbol;                  // invalid symbol
            unsigned dist = dists[symbol] + bits(s, dext[symbol]);
            if (dist > s->max) {
                s->max = MAXDIST;       // issue warning only once
            }
            // Update state for match.
            if (dist > s->blockout) {
                dist -= s->blockout;
                if (dist > s->reach)
                    s->reach = dist;
            }
            s->blockout += len;
            s->matches++;
            s->matchlen += len;
            if (s->max < s->win) {
                if (len > (int)(s->win - s->max))
                    s->max = s->win;
                else
                    s->max += len;
            }
            s->matbits += s->blockin - beg;
        }
    } while (symbol != 256);            // end of block symbol
    s->symbols--;
    // Done with a valid fixed or dynamic block.
    return IG_OK;
}

// Process a dynamic codes block.
int dynamic(struct state *s, int *found, int* trak) {
    // Get number of lengths in each table, check lengths.
    int nlen = bits(s, 5) + 257;
    int ndist = bits(s, 5) + 1;
    int ncode = bits(s, 4) + 4;
    if (nlen > MAXLCODES || ndist > MAXDCODES)
        return IG_TOO_MANY_CODES_ERR;       // bad counts

    // Read code length code lengths (really), missing lengths are zero.
    short lengths[MAXCODES];            // descriptor code lengths
    static const short order[19] =      // permutation of code length codes
        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
    int index;
    for (index = 0; index < ncode; index++) {
        int len = bits(s, 3);
        lengths[order[index]] = len;
    }
    for (; index < 19; index++)
        lengths[order[index]] = 0;
    // Build huffman table for code lengths codes (use lencode temporarily).
    short lencnt[MAXBITS+1], lensym[MAXLCODES];         // lencode memory
    struct huffman lencode = {lencnt, lensym};          // length code
    int err = construct(&lencode, lengths, 19);
    if (err < 0)
        return IG_CODE_LENGTHS_CODE_OVER_ERR;   // oversubscribed
    else if (err > 0)
        return IG_CODE_LENGTHS_CODE_UNDER_ERR;  // incomplete

    // Read length/literal and distance code length tables.
    index = 0;
    while (index < nlen + ndist) {
        int symbol = decode(s, &lencode);
        if (symbol < 0)
            return symbol;              // invalid symbol
        if (symbol < 16) {              // length in 0..15
            lengths[index++] = symbol;
        }
        else {                          // repeat instruction
            int len = -1;               // assume repeating zeros
            if (symbol == 16) {         // repeat last length 3..6 times
                if (index == 0)
                    return IG_REPEAT_NO_FIRST_ERR;  // no last length!
                len = lengths[index - 1];           // last length
                symbol = 3 + bits(s, 2);
            }
            else if (symbol == 17)      // repeat zero 3..10 times
                symbol = 3 + bits(s, 3);
            else                        // == 18, repeat zero 11..138 times
                symbol = 11 + bits(s, 7);
            if (index + symbol > nlen + ndist)
                return IG_REPEAT_TOO_MANY_ERR;  // too many lengths!
            if (len == -1)
                len = 0;
            while (symbol--)            // repeat last or zero symbol times
                lengths[index++] = len;
        }
    }
    if (lengths[256] == 0)
        return IG_NO_END_CODE_ERR;
    err = construct(&lencode, lengths, nlen);
    if (err < 0)
        return IG_LITLEN_CODE_OVER_ERR;
    else if (err > 0 && nlen - lencode.count[0] != 1)
        return IG_LITLEN_CODE_UNDER_ERR;    // incomplete with one code ok.
    short distcnt[MAXBITS+1], distsym[MAXDCODES];       // distcode memory
    struct huffman distcode = {distcnt, distsym};       // distance code
    err = construct(&distcode, lengths + nlen, ndist);
    if (err < 0)
        return IG_DIST_CODE_OVER_ERR;
    else if (err > 0 && ndist - distcode.count[0] != 1)
        return IG_DIST_CODE_UNDER_ERR;      // incomplete with one code ok
    s->headlen = s->blockin;
    return codes(s, &lencode, &distcode, found, trak);
}

int infgen(struct state *s, int *found, int * trak) {
    // Initialize input state.
    s->bitcnt = 0;
    s->bitbuf = 0;
    s->seqs = 0;
    s->max = 0;
    // Initialize statistics.
    s->blocks = 0;
    s->inbits = 0;
    s->outbytes = 0;
    s->symbnum = 0;
    s->matchnum = 0;
    s->matchtot = 0;
    s->littot = 0;

    // Return if bits() or decode() tries to read past available input.
    int err = 0;
    if (*found != 0) {  // if came back here via longjmp()
        err = IG_INCOMPLETE;     // then skip do-loop, return error
    } else {
        // Process blocks until last block or error.
        int last;
        do {
            s->reach = 0;
            s->blockin = 0;
            s->blockout = 0;
            s->symbols = 0;
            s->matches = 0;
            s->matchlen = 0;
            s->litbits = 0;
            s->matbits = 0;

            last = bits(s, 1);  // one if last block
            int type = bits(s, 2);  // block type 0..3
            printf("type is %d\n",type);
            switch (type) {
            case 0:
                err = IG_BLOCK_TYPE_ERR;
                break;
            case 1:
                err = IG_BLOCK_TYPE_ERR;
                break;
            case 2:
                err = dynamic(s,found);
                break;
            default:  // case 3
                err = IG_BLOCK_TYPE_ERR;
            }

            if (err != IG_OK) {
                break;  // return with error
            }
        } while (!last);
    }
    // Return error state.
    return err;
}


void clean(uint8_t *input, size_t length, int *found){
struct state s;
    s.info = 0;
    s.data = 1;
    s.tree = 1;
    s.draw = 0;
    s.win = MAXDIST;
    s.in_pos = 0;

    // Set input to the provided byte array
    s.in = input;
    s.in_size = length; // Assuming you have a way to track the length of the input array

    // Process the deflate data
    int ret =0;
    do {
        // Read bytes and process them as deflate data
        int ch = get(&s);
        //printf("%d\n",*found);
        if (ch == -1) {
            ret = -1;
            break; // End of input
        }
        // Handle deflate data processing
        ret = infgen(&s,found);
        printf("%d\n",ret);
        if (ret < 0) {
            break;
        }
    } while (ret > 0);

    // Finalize processing
    if (ret == 0) {
    }
    
}


int main(int argc, char **argv){
    int     cryptlength, plainlength;
    time_t      now;
    int     i,x,y,z, offset=12;
    char        *cryptname=NULL, *plainname=NULL;
    size_t start,port;
    uint16_t p;
    uint8_t ch;
    for( i = 1; i < argc; i++ )
    {
        if( !strcmp( "-C", argv[i] ) )
        { /* Check byte */
            ch = atoi( argv[++i] );
        }
        if( !strcmp( "-o", argv[i] ) )
        { /* offset */
            offset += atoi( argv[++i] );
        }
        else if( !strcmp( "-c", argv[i] ) )
        { /* ciphertext filename */
            cryptname = argv[++i];
        }
        else if( !strcmp( "-p", argv[i] ) )
        { /* plaintext filename */
            plainname = argv[++i];
        }
    }
    if( !cryptname || !plainname )
    {
        printf("Please pass ciphertext with -c first then plaintext with -p\n");
        exit(1);
    }

    if(cryptname )
    {
        cryptlength = get_size(cryptname);
        ciphertext = malloc( cryptlength );
    }
    if(plainname )
    {
        plainlength = get_size(plainname);
        plaintext = malloc( plainlength + offset );
    }
    
    if( plainlength > cryptlength-offset )
    {
        x =readdata(plainname,plaintext,plainlength);
        int frt =0;
        printf("frt is %d\n",frt);
        clean(plaintext,plainlength,&frt);
        if(frt == 1){
            printf("its working correctly\n");
            exit(0);
        }
        fprintf( stderr, "Warning! Plaintext is longer than Ciphertext!\n" );
    }
    if( plainlength < 1 )
    {
        fprintf( stderr, "Plaintext must be at least 3 bytes! Aborting.\n" );
        exit(1);
    }

    //cryptlength = plainlength + offset;
    if( !ciphertext || !plaintext )
    {
        fprintf( stderr, "Not enough memory!\n" );
        exit(1);
    }
    x=0;
    x = readdata(cryptname,ciphertext,cryptlength);
    if(x != cryptlength){
        printf("Couldn't read ciphertext.. exiting\n");
        exit(1);
    }
    key2i = malloc( sizeof(uint32_t)*KEY2SPACE );
    x =0;
    x =readdata(plainname,&plaintext[offset],plainlength);
    if(x != plainlength){
        printf("Couldn't read plaintext.. exiting\n");
        exit(1);
    }
    fprintf( stderr, "Finished on %s", ctime(&now) );
    exit(0);    
}

Now, I tried all I know but decompression is hard to understand, so I do not understand why is this happening.

And the base64 encoded stream:

HMK9w4cSwqRIF8KkwrsvwrN6wofDnjMzw6gEw74dWmvDjQ5IIMORWj7DvcKlw67CohdlXRUZRMKcw6PDvjkJwpHCusKsw7PDvzcowpbCtR7Ch8O/w70Hw78/w6jDrx8uw53CisO/w73Dp8O9w7bDv8OzH8KMw73Cp8Kkw4N/CARDw7/DgcOQw79ww7h/w7jDp8K/w78Lw6HDkMO7w5fChGXDrMO/w7fDn8OfP25EVMOMwrgvfhV4wpLCnUzDn8Ocw4zCvsKAwqPDp3tO

Solution

  • This line:

    `return left;`
    

    executes in the construct function. meaning the given HDIST or HLEN is incorrect for the given stream hence the error LENGTH CODE OVER. In deflate compressed stream the first 2 bytes contains 16 bits out of 17 bits block header so in the above given code the problem is

    int ch = get(&s);

    which extracts the byte from the stream and resets the cursor or put those bits into the buffer then calls

    ret = infgen(&s,found);
    

    which will not start processing this stream from second byte.

    removing this:

    int ch = get(&s);
    //printf("%d\n",*found);
    if (ch == -1) {
       ret = -1;
       break; // End of input
       }
    

    solved the problem and gave me chance to improve the code to just process only one block.