(EDITED)
I have a particle simulator in C where I generate files with the particle states per integration step containing time, position, and velocity components. The code works well, but the resulting text files can easily go to gigabytes of data, with billions of lines. My initial idea is trying to save those files in binary instead in order to save space. I'm also using GSL for the ODEs.
If so, I'm trying to save to binary file with fwrite
. The following is a minimal working example:
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_odeiv2.h>
int uniformFieldODE(double t, const double s[], double f[], void *params);
int main() {
int noPrimaries = 1;
double q = 1.0;
double m = 1.0;
double B[3] = { 0.0, 0.0, 1.0 };
double E[3] = { 0.1, 0.0, 0.1 };
double x0[3]; // = {0, 0, 0}; // Initial position
double v0[3]; // = {1, 1, 1}; // Initial velocity
double t0 = 0.0;
double tf = 5000.0;
double dt = 0.05;
double h = 1.0e-06;
double epsAbs = 1.0e-08;
double epsRel = 1.0e-10;
char *file = "lorentz";
const gsl_rng_type *T = gsl_rng_ranlxs0;
gsl_rng *r = gsl_rng_alloc(T);
gsl_rng_set(r, (unsigned long)time(NULL));
for (int i = 0; i < noPrimaries; i++) {
x0[0] = gsl_rng_uniform(r);
x0[1] = gsl_rng_uniform(r);
x0[2] = gsl_rng_uniform(r);
v0[0] = gsl_rng_uniform(r);
v0[1] = gsl_rng_uniform(r);
v0[2] = gsl_rng_uniform(r);
char fileNumber[i+1];
char fileName[i+20];
char extension[8] = ".bin";
sprintf(fileNumber, "%d", i+1);
strcpy(fileName, file);
strcat(fileName, fileNumber);
strcat(fileName, extension);
// Initial conditions & parameters
const int dimension = 6; // number of differential equations
double s[dimension]; // Initial state vector
int status; // status of driver function
double paramsB[8];
paramsB[0] = q;
paramsB[1] = m;
paramsB[2] = B[0];
paramsB[3] = B[1];
paramsB[4] = B[2];
paramsB[5] = E[0];
paramsB[6] = E[1];
paramsB[7] = E[2];
s[0] = x0[0]; s[1] = x0[1]; s[2] = x0[2];
s[3] = v0[0]; s[4] = v0[1]; s[5] = v0[2];
// File creation for state storing
FILE *data = fopen(fileName, "wb");
// Integrator configuration
double t, t_next;
gsl_odeiv2_system odeSystem;
odeSystem.function = uniformFieldODE;
odeSystem.dimension = dimension;
odeSystem.params = paramsB;
t = t0;
gsl_odeiv2_driver *drv;
drv = gsl_odeiv2_driver_alloc_y_new(&odeSystem, gsl_odeiv2_step_rk8pd, h, epsAbs, epsRel);
for (t_next = t0 + dt; t_next <= tf; t_next += dt) {
status = gsl_odeiv2_driver_apply(drv, &t, t_next, s);
if (status != GSL_SUCCESS) {
printf("Error: status = %d\n", status);
break;
}
//fprintf(data, "%.5e %.5e %.5e %.5e %.5e %.5e %.5e\n", t, s[0], s[1], s[2], s[3], s[4], s[5]);
char str[100];
sprintf(str, "%.5e %.5e %.5e %.5e %.5e %.5e %.5e\n", t, s[0], s[1], s[2], s[3], s[4], s[5]);
fwrite(&str, sizeof(str), 1, data);
}
gsl_odeiv2_driver_free(drv);
fclose(data);
}
return 0;
}
int uniformFieldODE(double t, const double s[], double f[], void *params){
(void)(t); /* avoid unused parameter warning */
double *lparams = (double *)params;
double q = lparams[0];
double m = lparams[1];
double mu = q / m;
double Bx = lparams[2];
double By = lparams[3];
double Bz = lparams[4];
double Ex = lparams[5];
double Ey = lparams[6];
double Ez = lparams[7];
f[0] = s[3];
f[1] = s[4];
f[2] = s[5];
f[3] = mu * (Bz*s[4] - By*s[5] + Ex);
f[4] = mu * (Bx*s[5] - Bz*s[3] + Ey);
f[5] = mu * (By*s[3] - Bx*s[4] + Ez);
return GSL_SUCCESS;
}
The problem is that the output still comes out as text, even if it has .bin as extension. Some unreadable characters show up in the beginning of the strings, but otherwise the output is the same as of a text file:
1.00000e-04 7.14907e+08 7.14919e+08 2.14475e+08 -1.30229e+08 -9.46995e+06 -9.50139e+06
5せ2.00000e-04 7.14894e+08 7.14918e+08 2.14474e+08 -1.30245e+08 -9.31686e+06 -9.43359e+06
I'm sure that there's a simple way of doing this, but I cannot figure it out. I'll really appreciate any inputs.
From my original post I got feedbacks that binary might not be the way to go to preserve all the data. So my goal here is to find an output format with a small size when compared to pure text files.
The files will later be used for data analysis via e.g. Python with numpy and/or pandas.
Brad Lanam's answer mentions using libz
to compress the output. Unfortunately it does not show an example.
If your program would write text output to stdout
, not to a file, like
printf(str, "%.5e %.5e %.5e %.5e %.5e %.5e %.5e\n", t, s[0], s[1], s[2], s[3], s[4], s[5]);
you could use a pipe like
yourprogram | gzip > output.txt.gz
In Python you can probably use classes gzip
or libz
to process a compressed input stream or you can use gzip
in a pipe again like
gzip -dc output.txt.gz | python_program