This extension is used to search data in a linked list and return it as a nested tuple of lists.[(ele1,ele2,),...]
However when the program runs for a period of time, segment fault will occur. I know that there might be something I'm missing with python memory management, but it's not clear to me exactly where the problem is.
header file:
#include "Python.h"
#define PY_EXTENSION_H
#ifndef TESTFORSCORE_MAIN_H
#define TESTFORSCORE_MAIN_H
#define MAX(a,b) a<b?b:a
#define MIN(a,b) a<b?a:b
typedef unsigned char byte;
typedef struct{
char* content;
int content_len;
int features_len;
byte* features;
long id;
}NODE;
typedef struct LISTNODE{
LISTNODE* next;
NODE* node;
}LISTNODE;
extern "C"{
static PyObject *compare(byte* features1,byte* features2,int min);
static PyObject *init_map(byte* features,int features_len,char* content ,int content_len,long id);
static PyObject *get_list_size(PyObject *self, PyObject *args);
static PyObject *search(PyObject *self, PyObject *args);
}
#endif
cpp
#define PY_SSIZE_T_CLEAN
#include "example.h"
static LISTNODE *head_of_map=(LISTNODE*) PyMem_RawMalloc(sizeof(LISTNODE));
LISTNODE *current_head=head_of_map;
long COUNTER=0;
static PyObject *get_list_size(PyObject *self, PyObject *args)
{
return Py_BuildValue("l", COUNTER);
}
static PyObject *init_map(PyObject *self, PyObject *args){//
PyObject *obj1,*obj2;
int len1,len2;
long activity;
byte *features,*tmp_b;
char *content,*tmp_c;
if (!PyArg_ParseTuple(args,"SSiiL", &obj1,&obj2,&len1,&len2,&activity)) {
return NULL;
}
features=(byte *) PyBytes_AsString(obj1);
content=PyBytes_AsString(obj2);
LISTNODE *list_node=(LISTNODE*) PyMem_RawMalloc(sizeof(LISTNODE));
NODE *node=(NODE*) PyMem_RawMalloc(sizeof(NODE));
tmp_b=(byte *) PyMem_RawMalloc(sizeof(byte)*len1);
tmp_c=(char *) PyMem_RawMalloc(sizeof(char)*len2);
memcpy(tmp_b,features,len1);
memcpy(tmp_c,content,len2);
node->features=tmp_b;
node->content=tmp_c;
node->features_len=len1;
node->content_len=len2;
node->id=activity;
list_node->node=node;
current_head->next=list_node;
current_head=current_head->next;
COUNTER+=1;
// err maybe here
//Py_DecRef(obj1);
//Py_DecRef(obj2);
PyMem_FREE(tmp_b);
PyMem_Free(tmp_c);
return Py_BuildValue("b",0);
}
static double _compare(byte* features1,byte* features2,int min){
//do something ...
return 0.3;
}
static PyObject *search(PyObject *self, PyObject *args){//(byte* features,char *content,int features_len,int content_len,double threshold,int _idx){
PyObject *obj1,*obj2,*tuple,*content,*matched_content,*score1,*_id,*matched_id, *lis= PyList_New(0);
byte* features;
char *c_content,*tmp_content,*tmp_matched;
int content_len,length ,idx;
double threshold,score;
LISTNODE *p = head_of_map;
if (!PyArg_ParseTuple(args,"SSiidi", &obj1,&obj2,&length,&content_len,&threshold,&idx)) {
return NULL;
}
features = (unsigned char*)PyBytes_AsString(obj1);
c_content = PyBytes_AsString(obj2);
// Py_DecRef(obj1);
// Py_DecRef(obj2);
p=p->next;
int counter=0;
while (p!=NULL){
counter+=1;
// compare here
score =0.3;
if(score>threshold){
//if(counter>1000) break;
tuple = PyTuple_New(5);
score1 = Py_BuildValue("d",score);
_id = Py_BuildValue("b",idx);
matched_id = Py_BuildValue("l",p->node->id);
tmp_content = (char*)PyMem_Malloc(sizeof(char)*content_len);
if (tmp_content == NULL)
return PyErr_NoMemory();
memcpy(tmp_content,c_content,content_len);
content = Py_BuildValue("y#",tmp_content,(Py_ssize_t) sizeof(char)*content_len);
tmp_matched=(char*) PyMem_Malloc(sizeof(char)*p->node->content_len);
if (tmp_matched == NULL)
return PyErr_NoMemory();
memcpy(tmp_matched,p->node->content, p->node->content_len);
matched_content = Py_BuildValue("y#",tmp_matched,(Py_ssize_t) sizeof(char)*p->node->content_len);
PyTuple_SetItem(tuple,0,_id);
PyTuple_SetItem(tuple,1,matched_id);
PyTuple_SetItem(tuple,2,score1);
PyTuple_SetItem(tuple,3,content);
PyTuple_SetItem(tuple,4,matched_content);
PyList_Append(lis,tuple);
Py_DecRef(tuple);
PyMem_Free(tmp_content);
PyMem_Free(tmp_matched);
}
p=p->next;
}
if (PyErr_Occurred()){
Py_DECREF(lis);
return NULL ;
}
return lis;
}
static PyMethodDef exampleMethods[] = {
{"get_list_size", get_list_size, METH_VARARGS, "example"},
{"compare",compare,METH_VARARGS,"example"},
{"init_map",init_map,METH_VARARGS,"example"},
{"destory",destory,METH_VARARGS,""},
{"search",search,METH_VARARGS,"example"},
{ NULL, NULL, 0, NULL}
};
static struct PyModuleDef ptexamplemodule = {
PyModuleDef_HEAD_INIT,
"example", /* name of module */
"A module that imports an API", /* Doc string (may be NULL) */
-1, /* Size of per-interpreter state or -1 */
exampleMethods /* Method table */
};
/* Module initialization function */
PyMODINIT_FUNC
PyInit_example(void) {
PyObject *m;
m = PyModule_Create(&ptexamplemodule);
if (m == NULL)
return NULL;
/* Import sample, loading its API functions */
return m;
}
int main() {
return 0;
}
setup file
from distutils.core import setup, Extension
setup(name='example',
ext_modules=[
Extension('example',
['example.cpp'],
include_dirs = [], # May need pysample.h directory
)
],
version="0.0.1"
)
test file
import example
import random
import time
import faulthandler
faulthandler.enable()
lis=[(b'\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2', b'\x856\x96\xda\xe4H_\xf9\xb3\x95\xff\xda\xc9\x05\x17\xd5'), (b'!\xdf\xc1\xc20h\xc3#\x8f\x8a\xd3\x92\xc9}\x1b\x08', b'\xa3\x03m\xd3\x96\x01\xb2\xe0\xfcOg\x87\xfa\xadA\x89'), (b'Fu\xe6\x97=\xb9\x88S\xe4Q\xc06\x9f[\x84J', b'`9\xf2- \x89\xc63?\xe3=\xf9o\x9b\xe4\x8e'), (b'\x95b\x1f\xba\x90\xfa\xe9j#\xd4\x12x\xc3\x93#\x1c', b'8f\xc4\xeb\xef\x95\xe8\x81\xec\xed1\x00j\xc2\xd2j'), (b'\xf9\x8bUk"\xa2y\xfbHI\x08\xf6\x03\xd3Ye', b'\xdf\xdb\xb6\x87\xf3\x05-p\x80%8\x8cd[43'), (b'\xee\x92\xcf\xb5\xd7\x05j\x92\xa2\x1c\xf7a.[\x05\x86', b'Y\x00|\x02\xa1\xb1wI\x08\xb3\xfc\xfc\xc7\xf5db'), (b"\x9d\xe0&\\~\xbf\xaf`\xae\xc2?'B\xfa\x95\x86", b'FL\xe3\xbb\x18\xeef\x08%\xe1\xc8,\xe3J=\xd5'), (b'\xe1\xfb\xc9n\x89\x1d\x9a\x9d\xe9\x1a\x9c\xde\xcd\xce\xbd\xcf', b'\xa7\xe0\x9b\x8bl\x88\x85,b\x04`3\xe6\x03\x85\x8e'), (b'\xc5\xbf\xd4\xa7\xe7e\xe5\x8a\xe0\xbc\xa8\xb8Yw\x0c\xd3', b":\xc2a\xb6\xd3\x1ct'z\xfe\xf9\xa7P\x8c\x1c\x7f"), (b'\xfd\xb2\x9aP\xc9\x0c\xcf\xe2\x8c\x82g\x8c{r\x94\x84', b'3B\xca\x8c\xd8\x9a\xb8\x94iyT\xd6\x05N_\x8a'), (b'\xea\x7f\x83\xaa\\\x10y\xd1h\x93l\xd9\xfe\x87\xe8j', b'jj\x8f\x1a-tq\xd9\xf1\xf8\xf7\xa5\xf6\x86\xb9\xdb'), (b'\xa4\x0b\x0f\xf1\x84\x95\x1eK\xddtH\xf0IaY:', b'p\xb5\xd9"\xb7\x0f(\r\xb1\x9bm\xc0hq\x00e'), (b'ga\x8f\xbbo\xff\xedQ\x9aM\xbe\x9cTO\xd35', b'=\xc3\x9d\x8d\xd0\xae8*1\x1d\xbf\x84\x89~76'), (b'\xaf\xe1%\xc6\xedo\xec24\xc0\x9d\xa8\x9fy=N', b'\x03m\x81\xd7B\x04\x1a*`C\xee\xccb\x1fm\xf0'), (b'\x1f\x10\xf6\x9fa\xc5\xbf\xaf\x18\xb1Mw\xba\x92&\xe1', b'\xb7\x90/\r$\t\xce}xk\xe7\xbeL\xe9\x8e\xc6'), (b'\xc8.\xd9)\x08g/\xc6\x0e\xed\xbb\xb9\xab\xad\xc8\xa1', b'\xa1c\xcc\xe9\xaasJ\x98\x1d\xd24\xc0\xf2+\x9d\xac'), (b'\xa1\x91\xa0:D\x11(\xb3\xbc0\x13\xd9\xdeD\xb9\x86', b'\x0fV\xa1\x8fl\x0c\xf4\x94=\xe7\xd0*\xdb\xcb\xa5;'), (b'bx;\x8dA\x83\xb8lca\x9c\xadV\xc2\xba/', b'\xe9\xe0\xe7({\x11\r*\xca\xf1\x08\x9bG\x830\x82'), (b"\xde\xbb\xd9\xce{%MP'Xj\xed8>\xa3a", b'\\5M\x13\x13\xb1YY\xa53y\x99$a\x91\xa5'), (b'\xf1\xda\xc4\xfe\xc6\x83\xe6\xf3\xc6\x88\x86H\xc4\x15\x12\xb4', b"\xf8'^\xe5\xb8`\x0fl\x06$\x1cz:\x93\xa1\\"), (b'\xc7@\x03Ny\x11\x96\xcb\xfe\x19\x0e\x18\xbf\xd59I', b'P\xf0\x0f\xec\xaf\x1bS\xa8\x8dt\x11O\xe8\x0f|2'), (b'\xb4\xda\xff\x8c0\x18/\xf1\xad\xa4F\x94\xb1\xad\xf4\xf1', b'.\x83(\x8f#\xe1\x11\x96\xdeS\x17\x8b\xe3]\xb5\xca'), (b'\x1d\t\x11\xc1\x15\xdc\x8e\xea\xe1\xd2\xba%J\\\xf2\xfd', b'\x070ht\xb4\xdeQ+\xc5\xb8#\xac\xd9z\xc0A'), (b' V\xe3\xc5gEg\xc5\xd5\xe1\xd6\xd7\xab\xd0\xb6n', b'\x88\x9b8\xf6\xe26\x92z\x91\xe4\x92\x1e\xbc\xc5;u'), (b'\x88}\xd7*QFn:\xd7\x12RA\xe6-\xd2)', b'\xeb\x04<\xe6\xfa)u\x9ds\xecj\xa7\x84\xcf\xa7\xa7'), (b"\xec\x04.bP)'\xb6\x08\x05\x92\x8c\x85\x1a\x84\x8f", b'@\x80\x08A\xba\x9c2Sd\xdf\x97\x07\xc4Im\xef'), (b'\x11(\x9ez\x0c\xd7\xca+7\xcc\xb0\x19,:\xcb5', b'\xf9V\x8cm\xe5-\xf9\xf1\xf6g\x86~\x8f\xd0<\x84'), (b'\xbb\xe9I=\xc74\xc9C\x0cT\x81\x9d\x85\xdeWF', b'\xfe\x1b\xb1gi\x94O\x98\xf0\x90cJ\xdb\xbf\xc7\x1e'), (b'2`\x9d\x9c\x02\xe4\xce\xf0\x14\xc0Z\xaaW\xf2\xa5\xff', b'\xe6\x05?\xe3X"\xb2O)~\x96\x8f^\x9c\x077'), (b'\xaf\x87,a\xa9\xcd\n|O\x1e\xa7\xdd\xba\x89;\x9d', b'\x98b\xac?\x13*\xf2m\xfeo=v\x96\xd5\x14\x8d'), (b'\xee\xcb{\xeeRQ\x82}\x05|\xc6S\x82\x85\xa4\xff', b'C\x93\xdes\x00\xe8cY<\xde1.\x19\xd8\xe5R'), (b'\xack\x036\x9b4\xca\xa4\x15\xbd\xd5\xbe\xd3\xd4\xfb{', b'm?\xc44>\xf4qig\x10\x8c\xeb\x11o\x99\xb4'), (b'D!\x0e*\xb3/<\x15F_\xd6;j\x85\xbe,', b'Y\xb01H\xb6:\x04_]OL\x9d\x8e\xbcp\xdc'), (b'\xe6\x9d\x02L[\x1c\xee\x8b0lu\xdd\x00\x9a\xfa!', b'J&\tkf\xa8\xeb\x1b\x90\x81\xce\\\x1b\xe8\xb6\x1d'), (b'\xcdnd\xf6\xf6\x11\x9e\x06\x10\xce\xcfP\x89\xed\xad\x02', b'I:\xe0{8H\xfcw\xb1\xe9v\x11p-/0'), (b'\xa22?\xcd\xfbU+\xbf- \xe3<\xc2\x94\xef\xb6', b'\xc7\x89\xc4K?{\x18\x1ftd\x0f\x19L}\xba\xf9'), (b'8\xf9\xe9\xd0\xa0\x97\x8e\rp\xe9\xd6[\x8e`\x93\x81', b'\xf7\x16"\xaa\x86\\b\x9d\x0c\x17"\x81\xeea\x03\x19'), (b')\xfb\x0c\xc7-D\xd8\xa4\xf7\x8d%\x02\x06\xa1\xbf\x94', b'\xc8s!s\xe2n\xee\xb0\x1eC\x15\xc0\xb4d\x08\xa7'), (b'\x7f\xa6\xdc\xf4\x9e\xe3\xda\xd3\x8a\xcb\xfc\x98,1\\\xe5', b'\x82,\xfbKL_\xe9\xc9L`\xc7"&3#\xb5'), (b'&\x98,\x88\xff\x0e\xca[\xcb\xc6t;\xbc5Y"', b'\x1b\x8bA\xd1\x8a\x9e\xe6\xce\xc4#C\x89\xe9Uon'), (b'\x1c\x1b\xac2\x86\xd6\x9a\xf9\xdd\x8d=\xa3\xff\x9a\xe1\xa5', b'\xed\x10\xed\xc1\xfe\xec\xd3\x0b\x8b\xe9\xd4?H"\xd6\x8f'), (b'9\x8f3\x8b\x02\xb7\xe7\xe5\xa1TO\x1a\xb6N\x0b\xb6', b'\xb4\x0f\xa7 \xc7\x83\x81\x18\x9cU\xfd2\x93\xb8(\xe9'), (b'\x8d\xbe\xcf`\xc3d\xde\xca\xf4\n.\x92Mrw\xa6', b';\t\xfb\x19eKt0\x8foG\xcd\xd5\xe0\xf6\x9d'), (b'd\xbcG)\xec\xc8\x04 \xdd\xd9\xf6\xd6\x91/\xb2\xd3', b'2\xba\xa8\x8a\xc5\xd6h\xab\x9c\x9e\x8b\x1b5\xcc_.'), (b'\x98\xea\xf0\xcbJ-c\xea\x81\xcf?B\x17\xcb\x99\xf9', b'\x82\x05\x14\xf2\x05q\x97\xf7Z-B\xad\xa2\x16\xa0`'), (b'\x1c\x94A\xfd\xe1\xf2\x06\x8b\x19\x80\xfa\x01g\xc3V\xaf', b'\x1c\xde\xec\xdf&\x93u\xab\xf9\x12\xfb\x04\xbc0\x99d'), (b'6\xfa\x993b\t\x12E\xd2t\xde\xb6\xfa\x98Im', b',\xa2\xf7VB\x81\x048\xc8\xbf3\xce\x8d\xecn\x82'), (b'L\xd6\x93\xbe\x02\xbeT\xa1\x8d\x88\x84\x18\xc7\xcf\x9fE', b'\x16i"\xb7\x81\xef7\x7f\xdb\x7fY\xbft\x1b\xac\xd0'), (b'\x90\xd6\xe7>Z\x8b\x83\x14\xab\xefH\xa1\x95\xe6\x8e\xc3', b'\xebJ\xfcXd\xd75I@\x12\xe9\x88\xdcG\x15\xb7'), (b'\xb8\xd6\xd03j\xed\xf9\xa0\xb1L3\x8d\x89P\xb4\xe5', b'\xe3\xdf)CR\x03s\x19\x8f\xb5\xc6\xf5\xee&W\xee'), (b'cx\xd7dQ\xc3)YR\x9c7\xf0\xb4\xe4P\xfd', b'h\xa8\x122\xceV\x8c\xf4\x12\x14J\x08\x91\xcf\x93\xfa'), (b'\x1d\x14\x86\xccTi\xdd\xc3L!\x97\x92\xb3\xb7X$', b't\x84\xa4\xe72\xa5X\xe9Ta\x03\x0c\xb00\x07\x83'), (b'\xa6\xdd\x1f\xfc\xca2\x88\x99\xbeA\x9a\xa6JVJ\x84', b'B\x81\x8e-K\xde\x15\xfd\t"\x08\x04@/-\xc9'), (b']b\x18\xe65C\x17*\x103J?\xe79!\x9b', b'\n\xbb\x1f\x98\nZZ\x97\xe0\x12\xf6W\xc4\x1e_~'), (b'\x9a\x9f-c\t\x14\xa4NL\xc5\xc0\x99R\x15\x94\xca', b'\xa0\xf9\x11\xfa\xd7\xf3\xb5\x11\xb2SK\xe8\xdb\x06\x94\x1f'), (b'!\xf0\x90\xd8t_O\xc1\xb5\xfe}\xca\xa6\x15}\x19', b'\xc9+\x89a9y3\xe0\xffh\xedmMq\x13\xcc'), (b'`\xf1\xab\x07A\xe9\xe5\x805\xf2\xabD8\xce\x85A', b'\x9d\x0f\x91\xc2\x01\x93\x8fm\xd6\x03XZl\t\x15J'), (b')\r\xcf\xc0\xa1\xa9\xf7i\x94\xa9\x8d\xed\xea\xcd\xb0B', b'jq\xe9\x01*H\x14\xa7M\x8c\xd2\xa5\x19\\\x80\x12'), (b'vJ\x98\xbf\xd5D\x15\xdb\xa1\xd7\xdc\xaf\xa0\xaa\xf7\xc1', b'\x06\xd8-\x05\xfe\x172W\xa4\xab\xe4\x97_h\xe6\xac'), (b"\xc6'P\xd2\xfcq\xcb\n\xe2\xc1sN>\xbc\x9a\x08", b'\xacysx4z\x96\x8f\xf1\xa5M\x9e\xe7f\\\x16'), (b'\x00\xc6\x81\xf0\xf1\xcb\xa8_\x1fF\x18\xe9\xb0\x95\xa3\xab', b'N\xd8\xdaz\xe8F4\xb1\x88>\xec\xd6[\x15\xd7\xfd'), (b'E\x1ee\x8f\xd5\x0f\x19\xa8\xb6~-\xe0n\xe7\x89\xe1', b'\x83\x90\xa0\x93\xec\x086+\xcf\x08\x9c\x048\x85\x88^'), (b"\x82\x13\xfa\x9f 6\xd3'\xd7\xd2\xc1\xf9\x03.\x13*", b'J\xe2\xf8\x1e\xec\xeb\xd9"q\xbb:\x04\xc2\xdf<\xa8'), (b'\x01\xdf\xfcPW\xc80&{\xfe\x99\xf8\x10\xc6R\xce', b'=Cnh3\x9drKE\x1aU\x8e\xcc\x84jS'), (b'<\xeeo[J\xb0r0N\x07\xf4\x1e\xfb\x16\xac\x07', b'\xf9KP\xfe\xd2\xed\xc7`\xc7\x0c\x9et\xd3^\xc9g'), (b'P\xa1b#\x07V\xb0s\x86S\x9a\x1d[\xd9\xe7\x92', b'r&\xb1a\x0ff2\xd2\x8ft\x13\xff\xba\x0fy\xf4'), (b'\xc8t\xcc\x96V\x8bS\xde\x98\xf1\xa33\xa9\xdb\x1b\x85', b'13\xf1&\x8c\xfe3\xf9\xca\\\xc6\xdb\x1dT\x04b'), (b'\xb9u\xf2O\xfb\x05\xb3\xc2r*\x17d\\\x8f9M', b'\xeeQ\xf9&\xc5\xc8\xc8\xe6B\x1d\xcf\x184\x19]b'), (b'\x96\xe6\xc9\x1e;\x83e4\xb0\xbe\xf6\x94\x03\x06\x85:', b'\xa2\xb0\nS\xaf\xads.\x94\x16s\x901\xaf-\xe4'), (b'&\x81t\x048(\xd7 \xb6\x1aR\xaa\x98\xdbN\x0b', b'\x9cd\x8a#\xdd \xdcS\x14\x1e\x1b\xea\x87\x1b\xa41'), (b"\x1fC\x0cr,\x1d\xab\xa8\xd9\xfc\xa1\xa2\xbb\xcdw'", b'D\xb6)\x7f\xd7\xe6\xb2@\x18D\x1d\x08c\xce2k'), (b'$\xc1\xabD\r\xe0a\x0co\x12\x17\xbdB\x06\xc9y', b's\xd8\xf8\x05\x81\xfeY\xc4\xc8|\xe5\xe2]U\xfa"'), (b'\x8f\xe3\xd6\xc9d\xc4\xf0\xd0\xe3\x17\x12\x82\xb2\xfd\xe9\xef', b'\xf1\r/9\x97/\xd4\xf6dz\x89\x7fEG/\x15'), (b'\x16\xf2\xd8\x9a\x029\x9b{\x02\xf4u\x08~\x80\xb9Z', b'\xe1+:9\xaa\x87:\xf0\x02\xdd\xe5}G\x0ch\x98'), (b'\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO', b'\x13\xd0\x1c[\xe6\x8aa\xd6\xa0a\xb1X\x84\xb8\xde2'), (b'\xa8\xaa\xa8\xe7\xcal>\xe6\xdb\x18vL\xdak%}', b"\xe5W'\x83\xbd\xf2\x01\x9b\x89\x98l\xfd\xcf\x02\x84\x13"), (b'6\x9b\xae\x07\xe0+\xa0\xcd\xdbT\t\x02@\x81$E', b'\x10Q\x1an\xaa\xe7\x9cy\x04t\x16\x1d\xea\x9d\xd2\xe6'), (b'ne\x83\xb0\xe2\x8aG\xda\xaf%\x93cg\x84\xaf\xea', b'p!}\x8aQ(C\x08\xaf\x8bi\x80\xdaw\x7f\x81'), (b'<\xa6\x90\xb2\x03\x8f\xa4\x17$\xc7\xbe\x8a\x1e!\x01s', b'\x19\x7ftaaLUB\x1aBt\xf5U~\xeeY'), (b'\xe4|z\xaer8\xa0\xa3\xc8\x9c\x8d\x03`\xb8rN', b'b\xda\xf8e\n\x1f\xc1\xb2\x97\x13\xd7\x91\x91\x86\xbe\x13'), (b'\x00\x84\xc1\xd8\xe0ub\x1a\xc0\xa1\xfe\x08K\xfb>,', b'g\xb06$\xbb{s\xb9-\x8e5h|+\xf8\xb1'), (b'(\xac{\xb1^OOy\xc98\xf5\xc5\xc2\xd3\xb3\x1e', b'\x8d\xfc\x0f\xda\xd5\xa6\x89)\x92\x173\x1e)*\xf4&'), (b'G*.\xe2\xd8\xee\x87\xfa\x83~eS\xcc\x1e\xdeQ', b'\xc9y \x1a]Y\x8d\xf8\xa0\xd0^p\xc1\xf1\xa4\xd2'), (b'\x16\x91\xd7[\xbeC\x80T\x08\x0f\xbd\xe1\xf7\xbd\x1b\xb8', b'\xcc\x8c\xfb\x1f"\x16\xa7/\xe2inK\xaa\xdf\xc0\xe4'), (b'\x1d\xbc\xf2\x9b\xe90\x13\xc9\tuuk\r\xe4\xa9\xfa', b'\xf6\x97\xf2B\xaa\x83\xf9k\x15K\xa0\xb9\xdf\xf4,\x02'), (b'hIA(;\x8b\x92G!K\xca\x1a\xfd\x8c9\x95', b'0!V\xfc\xfb\xa1,R\xc6\xdaO\x9c\x16\xa8<\xe2'), (b'?\xd8\xd7e\x88\xad\xcd\xd6\x8d\\\x1e\xc2l$;\xd8', b"\x88\x8e\x86'#f\xcf\xe3,\x96&\r\xb1\xee@g"), (b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", b'\x99\xbf\xf9qo\xde\x92xge\x04\x84\xb8 \xfb\xe3'), (b'\xa7\x8d\x83\xc8\xa3\xc15\xdc`\x82\xed\xae.\xea\xf2\xea', b'[\xe8{\r\xd6g\xcf\xe8L\x16\xde\xcd\x90M\xbd\x9e'), (b'\x1a\x1b;\xa62\x10i\xb2\x19n\xc1\xddKz\xb4\xd8', b';On\xd2N\xef%B\x05_T9)IG\xcf'), (b'@g`\x1e\xc1\xdf\x14A\xc1\x04N\xb6]\x97\xd3\x17', b'\xbc6\xcb\xaah*\xf1\xd0_\xd9B\x94F\xa8\xd8\r'), (b'<\x86\xa4z\xb4\xaeS\x90\xa9\xb5I\xe8\xe6FI\x8c', b'\r2\xd2.H"\xfb\x9d}{\x11^\xdf\xcb\x186'), (b'\xf0\xb8J|\x0b\xfe\xcd\x8b\xc0$&"8\xffob', b'\xa8}\xf31\xdd\x07\xc6\xa0\xbb\x9f0\xca\xe3&^\xf6'), (b'r\x9b\x81\xa0D\x94\x80\xfdu\xb1O\xbc\xf6\x1a\xd8\xe0', b'\xd4)\xb6\xdbQ\xa5#\x16\xed9\xadM\xf82%\xeb'), (b'\xb9V\xb7R\xb9\xcdJ9c\x1a`V\xb9\xb0oX', b'\x9a\xff]\xb3\x83\x96\xf5\r\x98\xe2\xc5\x86\xa21k%'), (b"J\xe9\xaa\x8f\x07\xab\x90\x18'\xa6\x85\x08\x18\xa8\x96?", b'\xa7\x83\xed\xd9\t\xd8\x85\xe4\xf8\xcb0\x95n\xf2`\xa8'), (b'\xc3\xaf\xc8m\xfc\x90}=\xb8\x1fr\xab}\r\xc1!', b'\x04\xa0sq\xf7\xd3Dm\xb9\xb8\x9c\xbap(\xa1L'), (b'\x97bq\x0f\x93\xb5\xa1\xa1\xf58\xdcu5\x9a\xbd\x8b', b"\x07'\xc0\xdb/0\xd9H\x81\x06\xe2^ `F\xa9"), (b'Nn\xd4\xbd\x13dN\x9e\xcd\xd4\x06kk~\xf5\x7f', b"\x98j'}\xc5\xb0\x1d\x19\xe2/\x92'\x9e\xd4\xba("), (b'\x08ty\x84\xde>\x0f5jL6\x05\xe1\x02pH', b'0\xb9\xd5\xd40u\x04\xfdK\xe2D\xbc\xddy\xd4\x11')]
def cost(func):
def wrapper(*args,**kwargs):
start=time.time()
res=func(*args,**kwargs)
end=time.time()
spend=end-start
print(f"cost:{spend}")
return res
return wrapper
@cost
def test(repeat):
tmp=[*lis*repeat]
if example.get_list_size()<4000000:
for it in tmp:
t=b'something here'
example.init_map(it[0],t ,16,len(t),int(random.random()*10000))
print(f"pool size is {example.get_list_size()}")
print(f"get args from lis start")
print(lis)
arg1=lis[0][0]
print(f"get args from lis complete")
t=b'something need search'
print(f"start search")
res=example.search(arg1,t,16,len(t),0.1,int(random.random()*10000))
l=len(gc.get_objects())
print(f"res is {len(res)},gc:{l}")
if __name__=='__main__':
for i in range(10000):
test(10)
error
pool size is 1000
get args from lis start
Fatal Python error: Segmentation fault
Current thread 0x00007f0ca3c90340 (most recent call first):
File "/tmp/tmp.ztKz0S1yKb/test.py", line 29 in test
File "/tmp/tmp.ztKz0S1yKb/test.py", line 12 in wrapper
File "/tmp/tmp.ztKz0S1yKb/test.py", line 41 in <module>
Segmentation fault (core dumped)
I tried using PyMem_RawMalloc and Py_DecRef but still segfault. what should i do?
Sorry for the little misinformation I gave. I may have located where the problem is: problem here
Everything works when I comment out Py_DecRef ,otherwise it throws a seg fault exception:
I don't understand why Py_DecRef can't be used here.
There are some things wrong:
C and Python code have errors, so it doesn't compile / run - not an MCVE ([SO]: How to create a Minimal, Reproducible Example (reprex (mcve)))
There is a mix of raw (malloc) and pymalloc memory functions
Mismatches between types and format specifiers
C API misunderstanding. According to [Python.Docs]: Parsing arguments and building values (emphasis is mine):
Note that any Python object references which are provided to the caller are borrowed references; do not decrement their reference count!
Design flaw mixing C and Python types. There is the C list that is supposed to be the backbone, but it's tightly coupled with Python structures. Also attempting to export the functions from the .dll makes me think that things are not very clear for the OP. I'd suggest to keep the core functionality at the C level and provide wrappers for interacting with Python. Otherwise, keep switching from one to another would lose the speed advantage (which I assume is the reason for not writing everything in Python)
The question is tagged C++, but there's nothing C++'y about the code, it's rather C (in C++ there is std::list, smart pointers and other features that could save writing a lot of code)
Minor ones:
LISTNODE - NODE nesting. Although it looks better structured, the overhead for working with nested structures might be a greater loss
Passing the size of bytes (which is held inside it)
Method arguments specifications
Code style
Many many others
Before going further, I suggest a thorough reading of the following (including referenced URLs):
Now, there are multiple problems, each with multiple solutions.
Starting from your code, I prepared an example. It's written in C (lots of code which doesn't have anything to do with the goal).
All the business logic is carried out in C (it's callable from a C application), and the Python wrapping layer is on top of that.
dll00.h:
#include <stdint.h>
#if defined(_WIN32) && !defined(PY_MOD_BUILD)
# if defined(DLL00_STATIC)
# define DLL00_EXPORT_API
# else
# if defined DLL00_EXPORTS
# define DLL00_EXPORT_API __declspec(dllexport)
# else
# define DLL00_EXPORT_API __declspec(dllimport)
# endif
# endif
#else
# define DLL00_EXPORT_API
#endif
typedef uint8_t byte;
typedef unsigned long ulong;
typedef struct Node_ {
byte *features;
ulong features_len;
char *content;
ulong content_len;
long id;
struct Node_ *next;
} Node, *PNode;
typedef struct {
size_t len;
PNode *nodes;
double *scores;
} FoundData, *PFoundData;
#if defined(__cplusplus)
extern "C" {
#endif
DLL00_EXPORT_API int addNode(const byte *features, ulong features_len, const char *content, ulong content_len, long id);
DLL00_EXPORT_API size_t size();
DLL00_EXPORT_API PFoundData search(const byte *features, ulong features_len, const char *content, ulong content_len, long id, double threshold);
DLL00_EXPORT_API void freeFoundData(PFoundData data, int membersOnly);
DLL00_EXPORT_API size_t cleanup();
#if defined(__cplusplus)
}
#endif
dll00.c:
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DLL00_EXPORTS
#include "dll00.h"
static Node *gHead = NULL;
static Node *gCur = NULL;
static size_t gLen = 0;
int addNode(const byte *features, ulong features_len, const char *content, ulong content_len, long id)
{
Node *node = malloc(sizeof(Node));
if (node == NULL) {
return -1;
}
node->features = malloc(sizeof(byte) * features_len);
if (node->features == NULL) {
free(node);
return -2;
}
node->content = malloc(sizeof(char) * content_len);
if (node->content == NULL) {
free(node->features);
free(node);
return -3;
}
memcpy(node->features, features, features_len);
node->features_len = features_len;
memcpy(node->content, content, content_len);
node->content_len = content_len;
node->id = id;
node->next = NULL;
if (gCur == NULL) {
gCur = node;
gHead = node;
} else {
gCur->next = node;
gCur = node;
}
++gLen;
return 0;
}
size_t size()
{
return gLen;
}
static double compare(const byte *features0, ulong features0_len, const byte *features1, ulong features1_len, int min)
{
// @TODO - cfati: Dumb comparison
if ((features0_len > 1) && (features1_len > 1) && ((features0[0] == features1[0]) || (features0[0] == features1[1]) || (features0[1] == features1[0]) || (features0[1] == features1[1]))) {
return 0.3;
}
return 0;
}
PFoundData search(const byte *features, ulong features_len, const char *content, ulong content_len, long id, double threshold)
{
if (gCur == NULL) {
return NULL;
}
Node *node = gHead;
FoundData tmp = {0, NULL, NULL};
tmp.nodes = malloc(sizeof(PNode) * gLen);
if (tmp.nodes == NULL) {
perror("malloc failed 1.");
freeFoundData(&tmp, 1);
return NULL;
}
tmp.scores = malloc(sizeof(double) * gLen);
if (tmp.scores == NULL) {
perror("malloc failed 2.");
freeFoundData(&tmp, 1);
return NULL;
}
while (node != NULL) {
double score = compare(features, features_len, node->features, node->features_len, 0);
if (score > threshold) {
tmp.nodes[tmp.len] = node;
tmp.scores[tmp.len] = score;
++tmp.len;
}
node = node->next;
}
if (tmp.len == 0) {
return NULL;
} else {
PFoundData ret = calloc(1, sizeof(FoundData));
if (ret == NULL) {
perror("malloc failed 3.");
freeFoundData(&tmp, 1);
return NULL;
}
ret->nodes = malloc(sizeof(PNode) * tmp.len);
if (ret->nodes == NULL) {
perror("malloc failed 4.");
freeFoundData(ret, 0);
freeFoundData(&tmp, 1);
return NULL;
}
ret->scores = malloc(sizeof(double) * tmp.len);
if (ret->scores == NULL) {
perror("malloc failed 5.");
freeFoundData(ret, 0);
freeFoundData(&tmp, 1);
return NULL;
}
ret->len = tmp.len;
memcpy(ret->nodes, tmp.nodes, sizeof(PNode) * tmp.len);
memcpy(ret->scores, tmp.scores, sizeof(double) * tmp.len);
freeFoundData(&tmp, 1);
return ret;
}
}
void freeFoundData(PFoundData data, int membersOnly)
{
if (data == NULL)
return;
free(data->nodes);
free(data->scores);
if (!membersOnly)
free(data);
}
size_t cleanup()
{
size_t ret = 0;
Node *node = gHead;
while (node != NULL) {
free(node->features);
free(node->content);
Node *next = node->next;
free(node);
node = next;
++ret;
}
gHead = NULL;
gCur = NULL;
gLen = 0;
return ret;
}
main00.c:
#include <stdio.h>
#include <string.h>
#include "dll00.h"
void handleFoundData(PFoundData data)
{
printf("Found data (%d items):\n", data == NULL ? 0 : data->len);
if (data == NULL)
return;
for (size_t i = 0; i < data->len; ++i) {
printf(" Index: %d\n Score: %.03f\n", i, data->scores[i]);
printf(" Id: %d, Features length: %d, Content length: %d\n", data->nodes[i]->id, data->nodes[i]->features_len, data->nodes[i]->content_len);
printf(" Feats: %s\n", data->nodes[i]->features);
printf(" Cnt: %s\n", data->nodes[i]->content);
}
freeFoundData(data, 0);
}
int main()
{
Node dummies[] = {
{"1\02", 3, "567\0890", 7, 2},
{"ab\0c", 4, "d4567\0890", 9, 3},
};
printf("Element count: %zu\n", size());
for (int i = 0; i < sizeof(dummies) / sizeof(dummies[0]); ++i)
printf("Add node (%d) returned: %d\n", i, addNode(dummies[i].features, dummies[i].features_len, dummies[i].content, dummies[i].content_len, dummies[i].id));
printf("Element count: %zu\n", size());
// @TODO - cfati: Values based on .dll's dumb comparison
byte *src[] = {
"xxx", // None
"111", // 1st
"xaa", // 2nd
"1bd", // Both
};
for (int i = 0; i < sizeof(src) / sizeof(src[0]); ++i) {
printf("Search for '%s' ...\n", src[i]);
PFoundData found = search(src[i], strlen(src[i]), "", 0, 0, 0.1);
handleFoundData(found);
}
printf("Freed %zu nodes\n", cleanup());
printf("Element count: %zu\n", size());
printf("\nDone.\n\n");
return 0;
}
example.c:
#define PY_SSIZE_T_CLEAN
#include <Python.h>
//#define PY_MOD_BUILD
#include "dll00.h"
#define MOD_NAME "_example"
static PyObject* example_size(PyObject *self, PyObject *Py_UNUSED(args))
{
return PyLong_FromSize_t(size());
}
static PyObject* example_add_node(PyObject *self, PyObject *args)
{
PyObject *feat = NULL, *cnt = NULL;
long id;
if (!PyArg_ParseTuple(args, "SSl", &feat, &cnt, &id)) {
return NULL;
}
int ret = addNode((const byte*)PyBytes_AsString(feat), (ulong)PyBytes_Size(feat), PyBytes_AsString(cnt), (ulong)PyBytes_Size(cnt), id);
return PyLong_FromLong(ret);
}
static PyObject* example_search(PyObject *self, PyObject *args)
{
PyObject *feat = NULL, *cnt = NULL;
long id;
double thres;
if (!PyArg_ParseTuple(args, "SSld", &feat, &cnt, &id, &thres)) {
return NULL;
}
PFoundData tmp = search((const byte*)PyBytes_AsString(feat), PyBytes_Size(feat), PyBytes_AsString(cnt), PyBytes_Size(cnt), id, thres);
if (tmp == NULL) {
return PyList_New(0);
} else {
PyObject *ret = PyList_New(tmp->len);
if (ret == NULL) {
freeFoundData(tmp, 0);
return NULL;
}
for (size_t i = 0; i < tmp->len; ++i) {
PyObject *tup = Py_BuildValue(
"(ly#y#d)",
tmp->nodes[i]->id,
tmp->nodes[i]->content,
tmp->nodes[i]->content_len,
tmp->nodes[i]->features,
tmp->nodes[i]->features_len,
tmp->scores[i]);
// @TODO - cfati: No need to include data passed in arguments in each of the output tuples !!!
if (tup == NULL) {
Py_XDECREF(ret);
freeFoundData(tmp, 0);
return NULL;
} else {
PyList_SET_ITEM(ret, i, tup);
}
}
freeFoundData(tmp, 0);
return ret;
}
}
static PyObject* example_cleanup(PyObject *self, PyObject *Py_UNUSED(args))
{
size_t ret = 0;
//Py_BEGIN_ALLOW_THREADS
ret = cleanup();
//Py_END_ALLOW_THREADS
return PyLong_FromSize_t(ret);
}
static PyMethodDef methDef[] = {
{"size", example_size, METH_NOARGS, "Get List size"},
{"add_node", example_add_node, METH_VARARGS, "Add node"},
{"search", example_search, METH_VARARGS, "Search elements"},
{"cleanup", example_cleanup, METH_NOARGS, "Clean up"},
{NULL, NULL, 0, NULL},
};
static struct PyModuleDef modDef = {
PyModuleDef_HEAD_INIT,
MOD_NAME, // name
"A module that imports an API", // Doc string (may be NULL)
-1, // Size of per-interpreter state or -1
methDef, // Method table
};
PyMODINIT_FUNC PyInit__example() {
PyObject *m;
m = PyModule_Create(&modDef);
if (m == NULL)
return NULL;
/* Import sample, loading its API functions */
return m;
}
setup.py:
#!/usr/bin/env python
from distutils.core import setup, Extension
setup(
name="example",
ext_modules=[
Extension("_example",
["example.c", "dll00.c"],
include_dirs=(),
define_macros=[
("PY_MOD_BUILD", None),
],
),
],
version="0.0.1",
)
data.py (I just extracted input data to avoid polluting the other source file):
data = [
(b"\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2", b"\x856\x96\xda\xe4H_\xf9\xb3\x95\xff\xda\xc9\x05\x17\xd5"),
# @TODO - cfati: !!! DELETED NEXT 20 TUPLES so the answer fits the 30K characters limit !!!
(b"\xb4\xda\xff\x8c0\x18/\xf1\xad\xa4F\x94\xb1\xad\xf4\xf1", b".\x83(\x8f#\xe1\x11\x96\xdeS\x17\x8b\xe3]\xb5\xca"),
(b"\x1d\t\x11\xc1\x15\xdc\x8e\xea\xe1\xd2\xba%J\\\xf2\xfd", b"\x070ht\xb4\xdeQ+\xc5\xb8#\xac\xd9z\xc0A"),
(b" V\xe3\xc5gEg\xc5\xd5\xe1\xd6\xd7\xab\xd0\xb6n", b"\x88\x9b8\xf6\xe26\x92z\x91\xe4\x92\x1e\xbc\xc5;u"),
(b"\x88}\xd7*QFn:\xd7\x12RA\xe6-\xd2)", b"\xeb\x04<\xe6\xfa)u\x9ds\xecj\xa7\x84\xcf\xa7\xa7"),
(b"\xec\x04.bP)'\xb6\x08\x05\x92\x8c\x85\x1a\x84\x8f", b"@\x80\x08A\xba\x9c2Sd\xdf\x97\x07\xc4Im\xef"),
(b"\x11(\x9ez\x0c\xd7\xca+7\xcc\xb0\x19,:\xcb5", b"\xf9V\x8cm\xe5-\xf9\xf1\xf6g\x86~\x8f\xd0<\x84"),
(b"\xbb\xe9I=\xc74\xc9C\x0cT\x81\x9d\x85\xdeWF", b"\xfe\x1b\xb1gi\x94O\x98\xf0\x90cJ\xdb\xbf\xc7\x1e"),
(b"2`\x9d\x9c\x02\xe4\xce\xf0\x14\xc0Z\xaaW\xf2\xa5\xff", b"\xe6\x05?\xe3X\"\xb2O)~\x96\x8f^\x9c\x077"),
(b"\xaf\x87,a\xa9\xcd\n|O\x1e\xa7\xdd\xba\x89;\x9d", b"\x98b\xac?\x13*\xf2m\xfeo=v\x96\xd5\x14\x8d"),
(b"\xee\xcb{\xeeRQ\x82}\x05|\xc6S\x82\x85\xa4\xff", b"C\x93\xdes\x00\xe8cY<\xde1.\x19\xd8\xe5R"),
(b"\xack\x036\x9b4\xca\xa4\x15\xbd\xd5\xbe\xd3\xd4\xfb{", b"m?\xc44>\xf4qig\x10\x8c\xeb\x11o\x99\xb4"),
(b"D!\x0e*\xb3/<\x15F_\xd6;j\x85\xbe,", b"Y\xb01H\xb6:\x04_]OL\x9d\x8e\xbcp\xdc"),
(b"\xe6\x9d\x02L[\x1c\xee\x8b0lu\xdd\x00\x9a\xfa!", b"J&\tkf\xa8\xeb\x1b\x90\x81\xce\\\x1b\xe8\xb6\x1d"),
(b"\xcdnd\xf6\xf6\x11\x9e\x06\x10\xce\xcfP\x89\xed\xad\x02", b"I:\xe0{8H\xfcw\xb1\xe9v\x11p-/0"),
(b"\xa22?\xcd\xfbU+\xbf- \xe3<\xc2\x94\xef\xb6", b"\xc7\x89\xc4K?{\x18\x1ftd\x0f\x19L}\xba\xf9"),
(b"8\xf9\xe9\xd0\xa0\x97\x8e\rp\xe9\xd6[\x8e`\x93\x81", b"\xf7\x16\"\xaa\x86\\b\x9d\x0c\x17\"\x81\xeea\x03\x19"),
(b")\xfb\x0c\xc7-D\xd8\xa4\xf7\x8d%\x02\x06\xa1\xbf\x94", b"\xc8s!s\xe2n\xee\xb0\x1eC\x15\xc0\xb4d\x08\xa7"),
(b"\x7f\xa6\xdc\xf4\x9e\xe3\xda\xd3\x8a\xcb\xfc\x98,1\\\xe5", b"\x82,\xfbKL_\xe9\xc9L`\xc7\"&3#\xb5"),
(b"&\x98,\x88\xff\x0e\xca[\xcb\xc6t;\xbc5Y\"", b"\x1b\x8bA\xd1\x8a\x9e\xe6\xce\xc4#C\x89\xe9Uon"),
(b"\x1c\x1b\xac2\x86\xd6\x9a\xf9\xdd\x8d=\xa3\xff\x9a\xe1\xa5", b"\xed\x10\xed\xc1\xfe\xec\xd3\x0b\x8b\xe9\xd4?H\"\xd6\x8f"),
(b"9\x8f3\x8b\x02\xb7\xe7\xe5\xa1TO\x1a\xb6N\x0b\xb6", b"\xb4\x0f\xa7 \xc7\x83\x81\x18\x9cU\xfd2\x93\xb8(\xe9"),
(b"\x8d\xbe\xcf`\xc3d\xde\xca\xf4\n.\x92Mrw\xa6", b";\t\xfb\x19eKt0\x8foG\xcd\xd5\xe0\xf6\x9d"),
(b"d\xbcG)\xec\xc8\x04 \xdd\xd9\xf6\xd6\x91/\xb2\xd3", b"2\xba\xa8\x8a\xc5\xd6h\xab\x9c\x9e\x8b\x1b5\xcc_."),
(b"\x98\xea\xf0\xcbJ-c\xea\x81\xcf?B\x17\xcb\x99\xf9", b"\x82\x05\x14\xf2\x05q\x97\xf7Z-B\xad\xa2\x16\xa0`"),
(b"\x1c\x94A\xfd\xe1\xf2\x06\x8b\x19\x80\xfa\x01g\xc3V\xaf", b"\x1c\xde\xec\xdf&\x93u\xab\xf9\x12\xfb\x04\xbc0\x99d"),
(b"6\xfa\x993b\t\x12E\xd2t\xde\xb6\xfa\x98Im", b",\xa2\xf7VB\x81\x048\xc8\xbf3\xce\x8d\xecn\x82"),
(b"L\xd6\x93\xbe\x02\xbeT\xa1\x8d\x88\x84\x18\xc7\xcf\x9fE", b"\x16i\"\xb7\x81\xef7\x7f\xdb\x7fY\xbft\x1b\xac\xd0"),
(b"\x90\xd6\xe7>Z\x8b\x83\x14\xab\xefH\xa1\x95\xe6\x8e\xc3", b"\xebJ\xfcXd\xd75I@\x12\xe9\x88\xdcG\x15\xb7"),
(b"\xb8\xd6\xd03j\xed\xf9\xa0\xb1L3\x8d\x89P\xb4\xe5", b"\xe3\xdf)CR\x03s\x19\x8f\xb5\xc6\xf5\xee&W\xee"),
(b"cx\xd7dQ\xc3)YR\x9c7\xf0\xb4\xe4P\xfd", b"h\xa8\x122\xceV\x8c\xf4\x12\x14J\x08\x91\xcf\x93\xfa"),
(b"\x1d\x14\x86\xccTi\xdd\xc3L!\x97\x92\xb3\xb7X$", b"t\x84\xa4\xe72\xa5X\xe9Ta\x03\x0c\xb00\x07\x83"),
(b"\xa6\xdd\x1f\xfc\xca2\x88\x99\xbeA\x9a\xa6JVJ\x84", b"B\x81\x8e-K\xde\x15\xfd\t\"\x08\x04@/-\xc9"),
(b"]b\x18\xe65C\x17*\x103J?\xe79!\x9b", b"\n\xbb\x1f\x98\nZZ\x97\xe0\x12\xf6W\xc4\x1e_~"),
(b"\x9a\x9f-c\t\x14\xa4NL\xc5\xc0\x99R\x15\x94\xca", b"\xa0\xf9\x11\xfa\xd7\xf3\xb5\x11\xb2SK\xe8\xdb\x06\x94\x1f"),
(b"!\xf0\x90\xd8t_O\xc1\xb5\xfe}\xca\xa6\x15}\x19", b"\xc9+\x89a9y3\xe0\xffh\xedmMq\x13\xcc"),
(b"`\xf1\xab\x07A\xe9\xe5\x805\xf2\xabD8\xce\x85A", b"\x9d\x0f\x91\xc2\x01\x93\x8fm\xd6\x03XZl\t\x15J"),
(b")\r\xcf\xc0\xa1\xa9\xf7i\x94\xa9\x8d\xed\xea\xcd\xb0B", b"jq\xe9\x01*H\x14\xa7M\x8c\xd2\xa5\x19\\\x80\x12"),
(b"vJ\x98\xbf\xd5D\x15\xdb\xa1\xd7\xdc\xaf\xa0\xaa\xf7\xc1", b"\x06\xd8-\x05\xfe\x172W\xa4\xab\xe4\x97_h\xe6\xac"),
(b"\xc6'P\xd2\xfcq\xcb\n\xe2\xc1sN>\xbc\x9a\x08", b"\xacysx4z\x96\x8f\xf1\xa5M\x9e\xe7f\\\x16"),
(b"\x00\xc6\x81\xf0\xf1\xcb\xa8_\x1fF\x18\xe9\xb0\x95\xa3\xab", b"N\xd8\xdaz\xe8F4\xb1\x88>\xec\xd6[\x15\xd7\xfd"),
(b"E\x1ee\x8f\xd5\x0f\x19\xa8\xb6~-\xe0n\xe7\x89\xe1", b"\x83\x90\xa0\x93\xec\x086+\xcf\x08\x9c\x048\x85\x88^"),
(b"\x82\x13\xfa\x9f 6\xd3'\xd7\xd2\xc1\xf9\x03.\x13*", b"J\xe2\xf8\x1e\xec\xeb\xd9\"q\xbb:\x04\xc2\xdf<\xa8"),
(b"\x01\xdf\xfcPW\xc80&{\xfe\x99\xf8\x10\xc6R\xce", b"=Cnh3\x9drKE\x1aU\x8e\xcc\x84jS"),
(b"<\xeeo[J\xb0r0N\x07\xf4\x1e\xfb\x16\xac\x07", b"\xf9KP\xfe\xd2\xed\xc7`\xc7\x0c\x9et\xd3^\xc9g"),
(b"P\xa1b#\x07V\xb0s\x86S\x9a\x1d[\xd9\xe7\x92", b"r&\xb1a\x0ff2\xd2\x8ft\x13\xff\xba\x0fy\xf4"),
(b"\xc8t\xcc\x96V\x8bS\xde\x98\xf1\xa33\xa9\xdb\x1b\x85", b"13\xf1&\x8c\xfe3\xf9\xca\\\xc6\xdb\x1dT\x04b"),
(b"\xb9u\xf2O\xfb\x05\xb3\xc2r*\x17d\\\x8f9M", b"\xeeQ\xf9&\xc5\xc8\xc8\xe6B\x1d\xcf\x184\x19]b"),
(b"\x96\xe6\xc9\x1e;\x83e4\xb0\xbe\xf6\x94\x03\x06\x85:", b"\xa2\xb0\nS\xaf\xads.\x94\x16s\x901\xaf-\xe4"),
(b"&\x81t\x048(\xd7 \xb6\x1aR\xaa\x98\xdbN\x0b", b"\x9cd\x8a#\xdd \xdcS\x14\x1e\x1b\xea\x87\x1b\xa41"),
(b"\x1fC\x0cr,\x1d\xab\xa8\xd9\xfc\xa1\xa2\xbb\xcdw'", b"D\xb6)\x7f\xd7\xe6\xb2@\x18D\x1d\x08c\xce2k"),
(b"$\xc1\xabD\r\xe0a\x0co\x12\x17\xbdB\x06\xc9y", b"s\xd8\xf8\x05\x81\xfeY\xc4\xc8|\xe5\xe2]U\xfa"),
(b"\x8f\xe3\xd6\xc9d\xc4\xf0\xd0\xe3\x17\x12\x82\xb2\xfd\xe9\xef", b"\xf1\r/9\x97/\xd4\xf6dz\x89\x7fEG/\x15"),
(b"\x16\xf2\xd8\x9a\x029\x9b{\x02\xf4u\x08~\x80\xb9Z", b"\xe1+:9\xaa\x87:\xf0\x02\xdd\xe5}G\x0ch\x98"),
(b"\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO", b"\x13\xd0\x1c[\xe6\x8aa\xd6\xa0a\xb1X\x84\xb8\xde2"),
(b"\xa8\xaa\xa8\xe7\xcal>\xe6\xdb\x18vL\xdak%}", b"\xe5W'\x83\xbd\xf2\x01\x9b\x89\x98l\xfd\xcf\x02\x84\x13"),
(b"6\x9b\xae\x07\xe0+\xa0\xcd\xdbT\t\x02@\x81$E", b"\x10Q\x1an\xaa\xe7\x9cy\x04t\x16\x1d\xea\x9d\xd2\xe6"),
(b"ne\x83\xb0\xe2\x8aG\xda\xaf%\x93cg\x84\xaf\xea", b"p!}\x8aQ(C\x08\xaf\x8bi\x80\xdaw\x7f\x81"),
(b"<\xa6\x90\xb2\x03\x8f\xa4\x17$\xc7\xbe\x8a\x1e!\x01s", b"\x19\x7ftaaLUB\x1aBt\xf5U~\xeeY"),
(b"\xe4|z\xaer8\xa0\xa3\xc8\x9c\x8d\x03`\xb8rN", b"b\xda\xf8e\n\x1f\xc1\xb2\x97\x13\xd7\x91\x91\x86\xbe\x13"),
(b"\x00\x84\xc1\xd8\xe0ub\x1a\xc0\xa1\xfe\x08K\xfb>,", b"g\xb06$\xbb{s\xb9-\x8e5h|+\xf8\xb1"),
(b"(\xac{\xb1^OOy\xc98\xf5\xc5\xc2\xd3\xb3\x1e", b"\x8d\xfc\x0f\xda\xd5\xa6\x89)\x92\x173\x1e)*\xf4&"),
(b"G*.\xe2\xd8\xee\x87\xfa\x83~eS\xcc\x1e\xdeQ", b"\xc9y \x1a]Y\x8d\xf8\xa0\xd0^p\xc1\xf1\xa4\xd2"),
(b"\x16\x91\xd7[\xbeC\x80T\x08\x0f\xbd\xe1\xf7\xbd\x1b\xb8", b"\xcc\x8c\xfb\x1f\"\x16\xa7/\xe2inK\xaa\xdf\xc0\xe4"),
(b"\x1d\xbc\xf2\x9b\xe90\x13\xc9\tuuk\r\xe4\xa9\xfa", b"\xf6\x97\xf2B\xaa\x83\xf9k\x15K\xa0\xb9\xdf\xf4,\x02"),
(b"hIA(;\x8b\x92G!K\xca\x1a\xfd\x8c9\x95", b"0!V\xfc\xfb\xa1,R\xc6\xdaO\x9c\x16\xa8<\xe2"),
(b"?\xd8\xd7e\x88\xad\xcd\xd6\x8d\\\x1e\xc2l$;\xd8", b"\x88\x8e\x86'#f\xcf\xe3,\x96&\r\xb1\xee@g"),
(b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", b"\x99\xbf\xf9qo\xde\x92xge\x04\x84\xb8 \xfb\xe3"),
(b"\xa7\x8d\x83\xc8\xa3\xc15\xdc`\x82\xed\xae.\xea\xf2\xea", b"[\xe8{\r\xd6g\xcf\xe8L\x16\xde\xcd\x90M\xbd\x9e"),
(b"\x1a\x1b;\xa62\x10i\xb2\x19n\xc1\xddKz\xb4\xd8", b";On\xd2N\xef%B\x05_T9)IG\xcf"),
(b"@g`\x1e\xc1\xdf\x14A\xc1\x04N\xb6]\x97\xd3\x17", b"\xbc6\xcb\xaah*\xf1\xd0_\xd9B\x94F\xa8\xd8\r"),
(b"<\x86\xa4z\xb4\xaeS\x90\xa9\xb5I\xe8\xe6FI\x8c", b"\r2\xd2.H\"\xfb\x9d}{\x11^\xdf\xcb\x186"),
(b"\xf0\xb8J|\x0b\xfe\xcd\x8b\xc0$&\"8\xffob", b"\xa8}\xf31\xdd\x07\xc6\xa0\xbb\x9f0\xca\xe3&^\xf6"),
(b"r\x9b\x81\xa0D\x94\x80\xfdu\xb1O\xbc\xf6\x1a\xd8\xe0", b"\xd4)\xb6\xdbQ\xa5#\x16\xed9\xadM\xf82%\xeb"),
(b"\xb9V\xb7R\xb9\xcdJ9c\x1a`V\xb9\xb0oX", b"\x9a\xff]\xb3\x83\x96\xf5\r\x98\xe2\xc5\x86\xa21k%"),
(b"J\xe9\xaa\x8f\x07\xab\x90\x18'\xa6\x85\x08\x18\xa8\x96?", b"\xa7\x83\xed\xd9\t\xd8\x85\xe4\xf8\xcb0\x95n\xf2`\xa8"),
(b"\xc3\xaf\xc8m\xfc\x90}=\xb8\x1fr\xab}\r\xc1!", b"\x04\xa0sq\xf7\xd3Dm\xb9\xb8\x9c\xbap(\xa1L"),
(b"\x97bq\x0f\x93\xb5\xa1\xa1\xf58\xdcu5\x9a\xbd\x8b", b"\x07'\xc0\xdb/0\xd9H\x81\x06\xe2^ `F\xa9"),
(b"Nn\xd4\xbd\x13dN\x9e\xcd\xd4\x06kk~\xf5\x7f", b"\x98j'}\xc5\xb0\x1d\x19\xe2/\x92'\x9e\xd4\xba("),
(b"\x08ty\x84\xde>\x0f5jL6\x05\xe1\x02pH", b"0\xb9\xd5\xd40u\x04\xfdK\xe2D\xbc\xddy\xd4\x11"),
]
code00.py:
#!/usr/bin/env python
import gc
import random
import sys
from pprint import pprint as pp
import _example as ex
from data import data
def test(repeat):
tmp = data * repeat
if ex.size() < 4000000:
for idx, it in enumerate(tmp):
t = f"dummy content {idx}"
ex.add_node(it[0], t.encode(), random.randint(0, 10000))
print(f"List (internal) size: {ex.size()}")
print(f"Input data len: {len(data)}")
search_feat = data[0][0]
t = b"\xd3 "
print("Start search:")
res = ex.search(search_feat, t, random.randint(0, 10000), 0.1)
pp(res)
print(f"GC len: {len(gc.get_objects())}")
def main(*argv):
test(1)
print(f"List (internal) size: {ex.size()}")
print(f"Freed {ex.cleanup()} elements")
print(f"List (internal) size: {ex.size()}")
print(f"GC len: {len(gc.get_objects())}")
if __name__ == "__main__":
print("Python {:s} {:03d}bit on {:s}\n".format(" ".join(elem.strip() for elem in sys.version.split("\n")),
64 if sys.maxsize > 0x100000000 else 32, sys.platform))
rc = main(*sys.argv[1:])
print("\nDone.\n")
sys.exit(rc)
Outputs:
Win (C .dll called from a C .exe):
[cfati@CFATI-5510-0:e:\Work\Dev\StackExchange\StackOverflow\q076229579]> sopr.bat ### Set shorter prompt to better fit when pasted in StackOverflow (or other) pages ### [prompt]> "c:\Install\pc032\Microsoft\VisualStudioCommunity\2019\VC\Auxiliary\Build\vcvarsall.bat" x64 > nul [prompt]> [prompt]> dir /b code00.py data.py dll00.c dll00.h example.c main00.c orig setup.py [prompt]> [prompt]> cl /nologo /MD /DDLL dll00.c /link /NOLOGO /DLL /OUT:dll00.dll dll00.c Creating library dll00.lib and object dll00.exp [prompt]> cl /nologo /MD /W0 main00.c /link /NOLOGO /OUT:win_main00.exe dll00.lib main00.c [prompt]> del *.obj *.exp [prompt]> dir /b code00.py data.py dll00.c dll00.dll dll00.h dll00.lib example.c main00.c orig setup.py win_main00.exe [prompt]> [prompt]> win_main00.exe Element count: 0 Add node (0) returned: 0 Add node (1) returned: 0 Element count: 2 Search for 'xxx' ... Found data (0 items): Search for '111' ... Found data (1 items): Index: 0 Score: 0.300 Id: 2, Features length: 3, Content length: 7 Feats: 1? Cnt: 567 Search for 'xaa' ... Found data (1 items): Index: 0 Score: 0.300 Id: 3, Features length: 4, Content length: 9 Feats: ab Cnt: d4567 Search for '1bd' ... Found data (2 items): Index: 0 Score: 0.300 Id: 2, Features length: 3, Content length: 7 Feats: 1? Cnt: 567 Index: 1 Score: 0.300 Id: 3, Features length: 4, Content length: 9 Feats: ab Cnt: d4567 Freed 2 nodes Element count: 0 Done.
Nix (Python module):
(py_pc064_03.10_test0) [cfati@cfati-5510-0:/mnt/e/Work/Dev/StackExchange/StackOverflow/q076229579]> ~/sopr.sh ### Set shorter prompt to better fit when pasted in StackOverflow (or other) pages ### [064bit prompt]> [064bit prompt]> ls code00.py data.py dll00.c dll00.dll dll00.h dll00.lib example.c main00.c orig setup.py win_main00.exe [064bit prompt]> [064bit prompt]> python setup.py build [064bit prompt]> [064bit prompt]> ls build code00.py data.py dll00.c dll00.dll dll00.h dll00.lib example.c main00.c orig setup.py win_main00.exe [064bit prompt]> ls build/lib.linux-x86_64-cpython-310/ _example.cpython-310-x86_64-linux-gnu.so [064bit prompt]> [064bit prompt]> PYTHONPATH=${PYTHONPATH}:build/lib.linux-x86_64-cpython-310 python code00.py Python 3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0] 064bit on linux List (internal) size: 100 Input data len: 100 Start search: [(4555, b'dummy content 0', b'\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2', 0.3), (273, b'dummy content 74', b'\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO', 0.3), (9201, b'dummy content 87', b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", 0.3)] GC len: 8942 List (internal) size: 100 Freed 100 elements List (internal) size: 0 GC len: 8937 Done.