I need to write a large number of vertices to a text file in a specific format (.obj wavefront). So I was testing out approaches.
import numpy as np
def write_test(vertices, file_path, overwrite=True):
"""loop through each vertex, format and write"""
if overwrite:
with open(file_path, 'w') as obj_file:
obj_file.write('')
with open(file_path, 'a') as test_file:
for v in vertices:
test_file.write('v %s %s %s\n' % (v[0], v[1], v[2]))
def write_test2(vertices, file_path, overwrite=True):
"""use np.savetxt"""
if overwrite:
with open(file_path, 'w') as obj_file:
obj_file.write('')
with open(file_path, 'a') as test_file:
np.savetxt(test_file, vertices, 'v %s %s %s\n', delimiter='', newline='')
def write_test3(vertices, file_path, overwrite=True):
"""avoid writing in a loop by creating a template for the entire array, and format at once"""
if overwrite:
with open(file_path, 'w') as obj_file:
obj_file.write('')
with open(file_path, 'a') as test_file:
temp = 'v %s %s %s\n' * len(vertices)
test_file.write(temp % tuple(vertices.ravel()))
def write_test4(vertices, file_path, overwrite=True):
"""write only once, use join to concatenate string in memory"""
if overwrite:
with open(file_path, 'w') as obj_file:
obj_file.write('')
with open(file_path, 'a') as test_file:
test_file.write('v ' + '\nv '.join(' '.join(map(str, v)) for v in vertices))
As it turns out, to my surprise write_test
is faster then write_test2
, with write_test3
being the fastest one
In [2]: a=np.random.normal(0, 1, (1234567, 3))
In [3]: %timeit write_test(a, 'test.obj')
2.6 s ± 94.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [4]: %timeit write_test2(a, 'test.obj')
3.6 s ± 30 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [5]: %timeit write_test3(a, 'test.obj')
2.23 s ± 7.29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [6]: %timeit write_test4(a, 'test.obj')
3.49 s ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Currently, writing to text file is the bottle neck in my vectorized code.
Looking at the np.savetxt
code as rchome suggested savetxt
seems to be doing a lot of generalized formatting work, and is probably looping in python anyway, so no wonder it is slower then the simple python loop in write_test
.
So my question now is that is there any faster way to accomplish this?
I finally ended up writing a C extension, since it did not seem like there was any way to squeeze more performance out of python/numpy implementation.
First I was using sprintf
for formatting and got these results -
In [7]: a = np.random.normal(0, 1, (1234567, 3))
In [8]: %timeit ObjWrite.write(a, 'a.txt')
1.21 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Compared to ~2.5 seconds this is a bit of an improvement, but not nearly enough to justify writing the extension
Since almost all of the time was being spent on formatting the string, I wrote a sprintf
replacement just for formatting doubles (accurate to 15-17th decimal place for values b/w -10^7
and 10^7
, which is acceptable for my use case)
In [9]: %timeit ObjWrite.writeFast(a, 'a-fast.txt')
302 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
~300ms - Decent!
Here is the module -
ObjWrite.c
#include <stdio.h>
#include <Python.h>
#include <numpy/arrayobject.h>
#define CHUNK_SIZE 32768
/*
Write vertices to given file, use sprintf for formatting
python-interface: ObjWrite.write(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIter(PyObject *self, PyObject *args) {
// Parse arguments
PyArrayObject *arr;
char *filepath = NULL;
if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);
npy_intp size = PyArray_SIZE(arr);
// Handle zero-sized arrays specially, if size is not a multiple of 3, exit
if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);
// get iterator
NpyIter* iter;
NpyIter_IterNextFunc *iternext;
PyArray_Descr *dtype;
dtype = PyArray_DescrFromType(NPY_DOUBLE);
iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
if (iter == NULL) return PyLong_FromLong(-1);
// get iternext function for fast access
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// get data pointer, this will get updated by the iterator
double **dataptr;
dataptr = (double **) NpyIter_GetDataPtrArray(iter);
// open file, exit if null
FILE *fp = fopen(filepath, "w");
if (fp == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// init file buffer, writing in chunks does not seem to offer any significant benefit
// but it should still will be useful when disk utilization is high
char fileBuffer[CHUNK_SIZE + 128];
int bufferCount = 0;
double x, y, z;
do {
// get 3 doubles from array
x = **dataptr;
iternext(iter);
y = **dataptr;
iternext(iter);
z = **dataptr;
// use sprintf to format and write to buffer
bufferCount += sprintf(&fileBuffer[bufferCount], "v %.17f %.17f %.17f\n", x, y, z);
// if the chunk is big enough, write it.
if (bufferCount >= CHUNK_SIZE) {
fwrite(fileBuffer, bufferCount, 1, fp);
bufferCount = 0;
}
} while (iternext(iter));
// write remainder
if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);
// clean-up and exit with success
NpyIter_Deallocate(iter);
fclose(fp);
return PyLong_FromLong(0);
}
/*
Turns out that maximum proportion of time is taken by sprintf call in the above implementation
So, the next part is basically implementing a faster way to format doubles
*/
static const char DIGITS[] = "0123456789"; // digit-char lookup table
/* get powers of 10, can overflow but we only need this for digits <= 9 */
int powOf10(int digits) {
int res = 1;
while (digits > 0) {
res *= 10;
digits--;
}
return res;
}
/* a fast way to get number of digits in a positive integer */
int countDigitsPosInt(int n) {
if (n < 100000) { // 5 or less
if (n < 100) { // 1 or 2
if (n < 10) { return 1; } else { return 2; }
} else { // 3 or 4 or 5
if (n < 1000) { return 3; }
else { // 4 or 5
if (n < 10000) { return 4; } else { return 5; }
}
}
} else { // 6 or more
if (n < 10000000) { // 6 or 7
if (n < 1000000) { return 6; } else { return 7; }
} else { // 8 to 10
if (n < 100000000) { return 8; }
else { // 9 or 10
if (n < 1000000000) { return 9; } else { return 10; }
}
}
}
}
/* format positive integers into `digits` length strings, zero-pad if number of digits too high
if number digits are greater then `digits`, it will get truncated, so watch out */
int posIntToStringDigs(char *s, int n, int digits) {
int q = n;
int r;
int i = digits - 1;
while (i >= 0 && q > 0) { // assign digits from last to first
r = q % 10;
q = q / 10;
*(s + i) = DIGITS[r]; // get char from lookup table
i--;
}
while (i >= 0) { // we are here because q=0 and still some digits remain
*(s + i) = '0'; // 0 pad these
i--;
}
return digits;
}
/* format positive integers - no zero padding */
int posIntToString(char *s, int n) {
if (n == 0) { // handle 0 case, no need of counting digits in this case
*s = '0';
return 1;
}
// call posIntToStringDigs with exactly the number of digits as in the integer
return posIntToStringDigs(s, n, countDigitsPosInt(n));
}
static const int MAX_D = 8; // max number of digits we'll break things into
static const int _10D = 100000000; // 10 ^ MAX_D
/*
format positive doubles
accurate to 15-17th digit for numbers that are not huge (< 10^7), fairly accurate for huge numbers
I personally do not need this to be crazy accurate, for the range of numbers I am expecting, this will do just fine
*/
int posDoubleToString(char *s, double f, int precision) {
// length of the generated string
int len = 0;
// to make big numbers int friendly, divide by 10 ^ MAX_D until the whole part would fit in an int
int steps = 0;
while (f > _10D) {
f /= _10D;
steps++;
}
int intPart = (int) f;
double decPart = f - intPart;
// add the first whole part to the string, we have no idea how many digits would be there
len += posIntToString(&s[len], intPart);
// if the number was bigger then 10 ^ MAX_D, we need to return it to its former glory, i.e. add rest to integer string
while (steps > 0) {
decPart = decPart * _10D;
intPart = (int) decPart;
decPart = decPart - intPart;
len += posIntToStringDigs(&s[len], intPart, MAX_D); // appending 0's important here
steps--;
}
// add the decimal point
s[len++] = '.';
// after the decimal, piggy back int-to-string function to `precision` number of digits
while (precision > 0) {
if (precision > MAX_D) {
decPart = decPart * _10D;
intPart = (int) decPart;
decPart = decPart - intPart;
len += posIntToStringDigs(&s[len], intPart, MAX_D);
precision -= MAX_D;
} else {
decPart = decPart * powOf10(precision);
intPart = (int) decPart;
decPart = decPart - intPart;
if (decPart > 0.5) intPart += 1; // round of
len += posIntToStringDigs(&s[len], intPart, precision);
precision = 0;
}
}
// truncate following zeros, loop on string in reverse
/* commented to mimic sprintf
int index = len - 1;
while (index > 0) {
if (s[index] != '0') break; // if last char is not 0 our work is done, nothing more to do
if (s[index - 1] == '.') break; // if char is 0 but its the last 0 before decimal point, stop
len--;
index--;
}*/
return len;
}
/* format positive or negative doubles */
int doubleToString(char *s, double f, int pre) {
// handle negatives
int len = 0;
if (f < 0) {
*s = '-';
len++;
f *= -1; // change to positive
}
len += posDoubleToString(&s[len], f, pre);
return len;
}
/*
Write vertices to given file, use our doubleToString for formatting
python-interface: ObjWrite.writeFast(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIterFast(PyObject *self, PyObject *args) {
// Parse arguments
PyArrayObject *arr;
char *filepath = NULL;
if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);
npy_intp size = PyArray_SIZE(arr);
// Handle zero-sized arrays specially, if size is not a multiple of 3, exit
if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);
// get iterator
NpyIter* iter;
NpyIter_IterNextFunc *iternext;
PyArray_Descr *dtype;
dtype = PyArray_DescrFromType(NPY_DOUBLE);
iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
if (iter == NULL) return PyLong_FromLong(-1);
// get iternext function for fast access
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// get data pointer, this will get updated by the iterator
double **dataptr;
dataptr = (double **) NpyIter_GetDataPtrArray(iter);
// open file, exit if null
FILE *fp = fopen(filepath, "w");
if (fp == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// init file buffer, writing in chunks does not seem to offer any significant benefit
// but it should still will be useful when disk utilization is high
char fileBuffer[CHUNK_SIZE + 128];
int bufferCount = 0;
double x, y, z;
do {
// get 3 doubles from array
x = **dataptr;
iternext(iter);
y = **dataptr;
iternext(iter);
z = **dataptr;
// use doubleToString to format and write to buffer
fileBuffer[bufferCount++] = 'v';
fileBuffer[bufferCount++] = ' ';
bufferCount += doubleToString(&fileBuffer[bufferCount], x, 17);
fileBuffer[bufferCount++] = ' ';
bufferCount += doubleToString(&fileBuffer[bufferCount], y, 17);
fileBuffer[bufferCount++] = ' ';
bufferCount += doubleToString(&fileBuffer[bufferCount], z, 17);
fileBuffer[bufferCount++] = '\n';
// if the chunk is big enough, write it.
if (bufferCount >= CHUNK_SIZE) {
fwrite(fileBuffer, bufferCount, 1, fp);
bufferCount = 0;
}
} while (iternext(iter));
// write remainder
if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);
// clean-up and exit with success
NpyIter_Deallocate(iter);
fclose(fp);
return PyLong_FromLong(0);
}
/* Set up the methods table */
static PyMethodDef objWriteMethods[] = {
{"write", methodWriteIter, METH_VARARGS, "write numpy array to a text file in .obj format"},
{"writeFast", methodWriteIterFast, METH_VARARGS, "write numpy array to a text file in .obj format"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
/* Set up module definition */
static struct PyModuleDef objWriteModule = {
PyModuleDef_HEAD_INIT,
"ObjWrite",
"write numpy array to a text file in .obj format",
-1,
objWriteMethods
};
/* module init function */
PyMODINIT_FUNC PyInit_ObjWrite(void) {
import_array();
return PyModule_Create(&objWriteModule);
}
setup.py
from distutils.core import setup, Extension
import numpy
def main():
setup(
name="ObjWrite",
version="1.0.0",
description="Python interface for the function to write numpy array to a file",
author="Shobhit Vashistha",
author_email="shobhit.v87@gmail.com",
ext_modules=[
Extension("ObjWrite", ["ObjWrite.c"], include_dirs=[numpy.get_include()])
]
)
if __name__ == "__main__":
main()
I am aware that this is probably overkill, but I had great fun diving into C and Python/Numpy C-Extension world, and hopefully someone else will find this useful in the future.