I have reduced the strange issue in this code to the minimum. This program writes 128,000 times the bytes for (int)90000 into a file and then tries to read it back in.
set zipped=false and everything works like a charm set zipped=true and everything works like a charm until the 496th chunk of 1024 bytes. At that point a single byte is lost and everything is shifted to the left by one byte (see output)
...
0 1 95 -112- which is byte code for int 90,000
Counters: 496 126937
1 95 -112 0- which is byte code for int 23,040,000
...
this is the code i came up with. I just can't figure out why it suddenly breaks in the middle of doing the same thing over and over. Any help/insights/explainers much appreciated.
public class TestApp7 {
static final boolean zipped = true;
static File theFile = null;
private static void writeZipData() throws Exception {
FileOutputStream fos = new FileOutputStream(theFile);
BufferedOutputStream bos = null;
if (zipped) {
GZIPOutputStream gzout = new GZIPOutputStream(fos);
bos = new BufferedOutputStream(gzout);
} else
bos = new BufferedOutputStream(fos);
byte[] bs9 = RHUtilities.toByteArray((int)90000);
for (int i=0; i<128000; i++)
bos.write(bs9);
bos.flush();
bos.close();
}
private static void readZipData() throws Exception {
byte[] buf = new byte[1024];
int chunkCounter = 0;
int intCounter = 0;
FileInputStream fin = new FileInputStream(theFile);
int rdLen = 0;
if (zipped) {
GZIPInputStream gin = new GZIPInputStream(fin);
while ((rdLen = gin.read(buf)) != -1) {
System.out.println("Counters: " + chunkCounter + " " + intCounter);
for (int i=0; i<rdLen/4; i++) {
byte[] bs = Arrays.copyOfRange(buf,(i*4),((i+1)*4));
intCounter++;
System.out.print(bs[0] + " " + bs[1] + " " + bs[2] + " " + bs[3]);
}
chunkCounter++;
}
gin.close();
} else {
while ((rdLen = fin.read(buf)) != -1) {
System.out.println("Counters: " + chunkCounter + " " + intCounter);
for (int i=0; i<rdLen/4; i++) {
byte[] bs = Arrays.copyOfRange(buf,(i*4),((i+1)*4));
intCounter++;
System.out.print(bs[0] + " " + bs[1] + " " + bs[2] + " " + bs[3]);
}
chunkCounter++;
}
}
fin.close();
}
public static void main(String args[]) {
try {
if (zipped)
theFile = new File("Test.gz");
else
theFile = new File("Test.dat");
writeZipData();
readZipData();
} catch (Throwable e) { e.printStackTrace(); }
}
}
So based on Jon's wonderful comments ... you cannot rely on .read(buffer) filling the buffer even when there are more bytes in the stream - it stops at the boundary where the BufferedOutputStream-wrapped GZIPOutputStream saved a chunk of data. just add another read to go beyond the boundary and complete the chunk
while ((rdLen = gin.read(buf)) != -1) {
if (rdLen<chunksize) {
byte[] missBytes = new byte[chunksize-rdLen];
int rdLine_miss = 0;
if ((rdLine_miss = gin.read(missBytes)) > 0)
System.arraycopy(missBytes,0,buf,rdLen,rdLine_miss);
rdLen += rdLine_miss;
}
for (int i=0; i<rdLen/4; i++) {
byte[] bs = Arrays.copyOfRange(buf,(i*4),((i+1)*4));
intCounter++;
System.out.println(bs[0] + " " + bs[1] + " " + bs[2] + " " + bs[3] + " ");
}
chunkCounter++;
}