I am working on an improved program that attempts to recover PKZIP keys with intention of CUDA support, So I integrated a decompression algorithm from infgen to decompress an array of bytes instead of file stream and look for specific byte sequence, in this case
MIME
, if those are found then it means the keys are correct.
This program works under the assumption that:
So for this test I am just reading the file with first 128 bytes without zip headers then try to decompress it and see if it works but unfortunately the Dynamic function returns -4
for some reason I do not understand but if I run ./infgen -dd file
with the same file I get to the target bytes
Here is my code:
#define IG_INCOMPLETE 1
#define IG_OK 0
#define IG_BLOCK_TYPE_ERR -1
#define IG_STORED_LENGTH_ERR -2
#define IG_TOO_MANY_CODES_ERR -3
#define IG_CODE_LENGTHS_CODE_OVER_ERR -4
#define IG_CODE_LENGTHS_CODE_UNDER_ERR -5
#define IG_REPEAT_NO_FIRST_ERR -6
#define IG_REPEAT_TOO_MANY_ERR -7
#define IG_LITLEN_CODE_OVER_ERR -8
#define IG_LITLEN_CODE_UNDER_ERR -9
#define IG_DIST_CODE_OVER_ERR -10
#define IG_DIST_CODE_UNDER_ERR -11
#define IG_NO_END_CODE_ERR -12
#define IG_BAD_CODE_ERR -13
struct state {
int info;
int data;
int tree;
int draw;
unsigned max;
unsigned win;
// Input data buffer.
const uint8_t *in; // Input data array
size_t in_size; // Size of the input data array
size_t in_pos; // Current read position in input data array
// Bit and chunk processing.
int bitcnt;
int bitbuf;
int seqs;
short seq[MAXSEQS];
char len[MAXSEQS];
// Block and total statistics.
unsigned reach;
unsigned headlen;
uintmax_t blockin;
uintmax_t blockout;
uintmax_t symbols;
uintmax_t matches;
uintmax_t matchlen;
uintmax_t litbits;
uintmax_t matbits;
uintmax_t blocks;
uintmax_t inbits;
uintmax_t outbytes;
uintmax_t symbnum;
uintmax_t matchnum;
uintmax_t matchtot;
uintmax_t littot;
};
int get(struct state *s) {
if (s->in_pos >= s->in_size) return -1; // End of input array
return s->in[s->in_pos++]; // Return next byte and increment position
}
int bits(struct state *s, int need) {
long val = s->bitbuf;
while (s->bitcnt < need) {
int next = get(s);
if (next == -1)
return -1; // out of input
val |= (long)(next) << s->bitcnt; // load eight bits
s->bitcnt += 8;
}
s->bitbuf = (int)(val >> need);
s->bitcnt -= need;
s->blockin += need;
val &= (1L << need) - 1;
if (s->draw > 1 && s->seqs < MAXSEQS) {
s->seq[s->seqs] = val;
s->len[s->seqs] = need;
s->seqs++;
}
return (int)val;
}
// Show and accumulate statistics at end of block.
void end(struct state *s) {
s->blocks++;
s->inbits += s->blockin;
s->outbytes += s->blockout;
s->symbnum += s->symbols;
}
struct huffman {
short *count; // number of symbols of each length
short *symbol; // canonically ordered symbols
};
int decode(struct state *s, struct huffman *h) {
int bitbuf = s->bitbuf; // bits to decode from the input
int left = s->bitcnt; // number of bits in bitbuf
int len = 1; // length of code in consideration
int code = 0; // len bits pulled from bitbuf
int first = 0; // first code of length len
int index = 0; // index of that code in the symbol table
short *next = h->count + 1; // pointer to number of codes of next length
for (;;) {
while (left--) {
code |= bitbuf & 1;
bitbuf >>= 1;
int count = *next++;
if (code < first + count) {
if (s->draw > 1 && s->seqs < MAXSEQS) {
int rev = 0;
for (int i = 0; i < len; i++)
rev = (rev << 1) | ((code >> i) & 1);
s->seq[s->seqs] = rev;
s->len[s->seqs] = len;
s->seqs++;
}
s->bitbuf = bitbuf;
s->bitcnt = (s->bitcnt - len) & 7;
s->blockin += len;
return h->symbol[index + (code - first)];
}
index += count;
first += count;
first <<= 1;
code <<= 1;
len++;
}
left = (MAXBITS+1) - len;
if (left == 0)
break;
bitbuf = get(s);
if (bitbuf == -1)
return -1; // out of input
if (left > 8)
left = 8;
}
return IG_BAD_CODE_ERR; // ran out of codes
}
int construct(struct huffman *h, short *length, int n) {
// Count the number of codes of each length.
for (int len = 0; len <= MAXBITS; len++)
h->count[len] = 0;
for (int symbol = 0; symbol < n; symbol++)
(h->count[length[symbol]])++; // assumes lengths are within bounds
if (h->count[0] == n) // no codes!
return 0; // complete, but decode() will fail
// Check for an over-subscribed or incomplete set of lengths.
int left = 1; // one possible code of zero length
for (int len = 1; len <= MAXBITS; len++) {
left <<= 1; // one more bit, double codes left
left -= h->count[len]; // deduct count from possible codes
if (left < 0)
return left; // over-subscribed--return negative
} // left > 0 means incomplete
short offs[MAXBITS+1]; // offsets in symbol table for each length
offs[1] = 0;
for (int len = 1; len < MAXBITS; len++)
offs[len + 1] = offs[len] + h->count[len];
for (int symbol = 0; symbol < n; symbol++)
if (length[symbol] != 0)
h->symbol[offs[length[symbol]]++] = symbol;
return left;
}
// Decode literal/length and distance codes until an end-of-block code.
int codes(struct state *s,struct huffman *lencode,struct huffman *distcode, int *found,int *trak) {
static const short lens[29] = { // size base for length codes 257..285
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
static const short lext[29] = { // extra bits for length codes 257..285
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
static const short dists[30] = { // offset base for distance codes 0..29
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
8193, 12289, 16385, 24577};
static const short dext[30] = { // extra bits for distance codes 0..29
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13};
int symbol;
do {
uintmax_t beg = s->blockin;
symbol = decode(s, lencode);
s->symbols++;
if (symbol < 0)
return symbol; // invalid symbol
if (symbol < 256) { // literal: symbol is the byte
// Write out the literal.
if (symbol == 0x4D && trak ==0){
*trak++;
}
if(symbol == 0x49 && trak == 1){
*trak++;
}
if (symbol == 0x4D && trak ==2){
*trak++;
}
if(symbol == 0x45 && trak == 3){
*found = 1;
return IG_OK;
}
s->blockout += 1;
if (s->max < s->win)
s->max++;
s->litbits += s->blockin - beg;
}
else if (symbol > 256) { // length
// Get and compute length.
if (symbol >= MAXLCODES)
return IG_BAD_CODE_ERR; // invalid fixed code
symbol -= 257;
int len = lens[symbol] + bits(s, lext[symbol]);
// Get distance.
symbol = decode(s, distcode);
if (symbol < 0)
return symbol; // invalid symbol
unsigned dist = dists[symbol] + bits(s, dext[symbol]);
if (dist > s->max) {
s->max = MAXDIST; // issue warning only once
}
// Update state for match.
if (dist > s->blockout) {
dist -= s->blockout;
if (dist > s->reach)
s->reach = dist;
}
s->blockout += len;
s->matches++;
s->matchlen += len;
if (s->max < s->win) {
if (len > (int)(s->win - s->max))
s->max = s->win;
else
s->max += len;
}
s->matbits += s->blockin - beg;
}
} while (symbol != 256); // end of block symbol
s->symbols--;
// Done with a valid fixed or dynamic block.
return IG_OK;
}
// Process a dynamic codes block.
int dynamic(struct state *s, int *found, int* trak) {
// Get number of lengths in each table, check lengths.
int nlen = bits(s, 5) + 257;
int ndist = bits(s, 5) + 1;
int ncode = bits(s, 4) + 4;
if (nlen > MAXLCODES || ndist > MAXDCODES)
return IG_TOO_MANY_CODES_ERR; // bad counts
// Read code length code lengths (really), missing lengths are zero.
short lengths[MAXCODES]; // descriptor code lengths
static const short order[19] = // permutation of code length codes
{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
int index;
for (index = 0; index < ncode; index++) {
int len = bits(s, 3);
lengths[order[index]] = len;
}
for (; index < 19; index++)
lengths[order[index]] = 0;
// Build huffman table for code lengths codes (use lencode temporarily).
short lencnt[MAXBITS+1], lensym[MAXLCODES]; // lencode memory
struct huffman lencode = {lencnt, lensym}; // length code
int err = construct(&lencode, lengths, 19);
if (err < 0)
return IG_CODE_LENGTHS_CODE_OVER_ERR; // oversubscribed
else if (err > 0)
return IG_CODE_LENGTHS_CODE_UNDER_ERR; // incomplete
// Read length/literal and distance code length tables.
index = 0;
while (index < nlen + ndist) {
int symbol = decode(s, &lencode);
if (symbol < 0)
return symbol; // invalid symbol
if (symbol < 16) { // length in 0..15
lengths[index++] = symbol;
}
else { // repeat instruction
int len = -1; // assume repeating zeros
if (symbol == 16) { // repeat last length 3..6 times
if (index == 0)
return IG_REPEAT_NO_FIRST_ERR; // no last length!
len = lengths[index - 1]; // last length
symbol = 3 + bits(s, 2);
}
else if (symbol == 17) // repeat zero 3..10 times
symbol = 3 + bits(s, 3);
else // == 18, repeat zero 11..138 times
symbol = 11 + bits(s, 7);
if (index + symbol > nlen + ndist)
return IG_REPEAT_TOO_MANY_ERR; // too many lengths!
if (len == -1)
len = 0;
while (symbol--) // repeat last or zero symbol times
lengths[index++] = len;
}
}
if (lengths[256] == 0)
return IG_NO_END_CODE_ERR;
err = construct(&lencode, lengths, nlen);
if (err < 0)
return IG_LITLEN_CODE_OVER_ERR;
else if (err > 0 && nlen - lencode.count[0] != 1)
return IG_LITLEN_CODE_UNDER_ERR; // incomplete with one code ok.
short distcnt[MAXBITS+1], distsym[MAXDCODES]; // distcode memory
struct huffman distcode = {distcnt, distsym}; // distance code
err = construct(&distcode, lengths + nlen, ndist);
if (err < 0)
return IG_DIST_CODE_OVER_ERR;
else if (err > 0 && ndist - distcode.count[0] != 1)
return IG_DIST_CODE_UNDER_ERR; // incomplete with one code ok
s->headlen = s->blockin;
return codes(s, &lencode, &distcode, found, trak);
}
int infgen(struct state *s, int *found, int * trak) {
// Initialize input state.
s->bitcnt = 0;
s->bitbuf = 0;
s->seqs = 0;
s->max = 0;
// Initialize statistics.
s->blocks = 0;
s->inbits = 0;
s->outbytes = 0;
s->symbnum = 0;
s->matchnum = 0;
s->matchtot = 0;
s->littot = 0;
// Return if bits() or decode() tries to read past available input.
int err = 0;
if (*found != 0) { // if came back here via longjmp()
err = IG_INCOMPLETE; // then skip do-loop, return error
} else {
// Process blocks until last block or error.
int last;
do {
s->reach = 0;
s->blockin = 0;
s->blockout = 0;
s->symbols = 0;
s->matches = 0;
s->matchlen = 0;
s->litbits = 0;
s->matbits = 0;
last = bits(s, 1); // one if last block
int type = bits(s, 2); // block type 0..3
printf("type is %d\n",type);
switch (type) {
case 0:
err = IG_BLOCK_TYPE_ERR;
break;
case 1:
err = IG_BLOCK_TYPE_ERR;
break;
case 2:
err = dynamic(s,found);
break;
default: // case 3
err = IG_BLOCK_TYPE_ERR;
}
if (err != IG_OK) {
break; // return with error
}
} while (!last);
}
// Return error state.
return err;
}
void clean(uint8_t *input, size_t length, int *found){
struct state s;
s.info = 0;
s.data = 1;
s.tree = 1;
s.draw = 0;
s.win = MAXDIST;
s.in_pos = 0;
// Set input to the provided byte array
s.in = input;
s.in_size = length; // Assuming you have a way to track the length of the input array
// Process the deflate data
int ret =0;
do {
// Read bytes and process them as deflate data
int ch = get(&s);
//printf("%d\n",*found);
if (ch == -1) {
ret = -1;
break; // End of input
}
// Handle deflate data processing
ret = infgen(&s,found);
printf("%d\n",ret);
if (ret < 0) {
break;
}
} while (ret > 0);
// Finalize processing
if (ret == 0) {
}
}
int main(int argc, char **argv){
int cryptlength, plainlength;
time_t now;
int i,x,y,z, offset=12;
char *cryptname=NULL, *plainname=NULL;
size_t start,port;
uint16_t p;
uint8_t ch;
for( i = 1; i < argc; i++ )
{
if( !strcmp( "-C", argv[i] ) )
{ /* Check byte */
ch = atoi( argv[++i] );
}
if( !strcmp( "-o", argv[i] ) )
{ /* offset */
offset += atoi( argv[++i] );
}
else if( !strcmp( "-c", argv[i] ) )
{ /* ciphertext filename */
cryptname = argv[++i];
}
else if( !strcmp( "-p", argv[i] ) )
{ /* plaintext filename */
plainname = argv[++i];
}
}
if( !cryptname || !plainname )
{
printf("Please pass ciphertext with -c first then plaintext with -p\n");
exit(1);
}
if(cryptname )
{
cryptlength = get_size(cryptname);
ciphertext = malloc( cryptlength );
}
if(plainname )
{
plainlength = get_size(plainname);
plaintext = malloc( plainlength + offset );
}
if( plainlength > cryptlength-offset )
{
x =readdata(plainname,plaintext,plainlength);
int frt =0;
printf("frt is %d\n",frt);
clean(plaintext,plainlength,&frt);
if(frt == 1){
printf("its working correctly\n");
exit(0);
}
fprintf( stderr, "Warning! Plaintext is longer than Ciphertext!\n" );
}
if( plainlength < 1 )
{
fprintf( stderr, "Plaintext must be at least 3 bytes! Aborting.\n" );
exit(1);
}
//cryptlength = plainlength + offset;
if( !ciphertext || !plaintext )
{
fprintf( stderr, "Not enough memory!\n" );
exit(1);
}
x=0;
x = readdata(cryptname,ciphertext,cryptlength);
if(x != cryptlength){
printf("Couldn't read ciphertext.. exiting\n");
exit(1);
}
key2i = malloc( sizeof(uint32_t)*KEY2SPACE );
x =0;
x =readdata(plainname,&plaintext[offset],plainlength);
if(x != plainlength){
printf("Couldn't read plaintext.. exiting\n");
exit(1);
}
fprintf( stderr, "Finished on %s", ctime(&now) );
exit(0);
}
Now, I tried all I know but decompression is hard to understand, so I do not understand why is this happening.
And the base64 encoded stream:
HMK9w4cSwqRIF8KkwrsvwrN6wofDnjMzw6gEw74dWmvDjQ5IIMORWj7DvcKlw67CohdlXRUZRMKcw6PDvjkJwpHCusKsw7PDvzcowpbCtR7Ch8O/w70Hw78/w6jDrx8uw53CisO/w73Dp8O9w7bDv8OzH8KMw73Cp8Kkw4N/CARDw7/DgcOQw79ww7h/w7jDp8K/w78Lw6HDkMO7w5fChGXDrMO/w7fDn8OfP25EVMOMwrgvfhV4wpLCnUzDn8Ocw4zCvsKAwqPDp3tO
This line:
`return left;`
executes in the construct function. meaning the given HDIST
or HLEN
is incorrect for the given stream hence the error LENGTH CODE OVER
.
In deflate compressed stream the first 2 bytes contains 16 bits out of 17 bits block header so in the above given code the problem is
int ch = get(&s);
which extracts the byte from the stream and resets the cursor or put those bits into the buffer then calls
ret = infgen(&s,found);
which will not start processing this stream from second byte.
removing this:
int ch = get(&s);
//printf("%d\n",*found);
if (ch == -1) {
ret = -1;
break; // End of input
}
solved the problem and gave me chance to improve the code to just process only one block.