pythonpython-3.xsqlitequeue

Items getting dropped while adding a list to queue


I have a system where I am trying to achieve a single point of action for writing to a database. For this I am using Queues, I fetch a record from a queue and add to a list. Once I have a required number of records in the list, I push the item onto the writer_queue, which is consumed by a different thread to write the records to database (SQLite in this case).

However, when I am adding the list to the writer_queue, it seems like not all records within the list are added, which is leading to unwanted gaps in the final table. I am not quite sure what is the cause of this.

Below is the code which I am dealing with:

import os
import time
import sqlite3

from queue import Queue
from pydantic import BaseModel
from typing import List, Dict, Optional
from threading import Thread
from dataclasses import dataclass

class Tables(BaseModel):
    max_buffer_length: int = 1000
    rt_table_name: str = ''
    query_schemas: Dict = {
        'RawTable': ['Time', 'Position', 'RPM', 'Flow', 
                              'Density', 'Pressure', 'Tension', 'Torque', 'Weight',],
    }
    table_schemas: Dict = {
        'RawTable': ['time', 'position', 'rpm', 'flow', 
                              'density', 'pressure', 'tension', 'torque', 'weight',],
    }

    def insert_data(self,
                    buffer: Optional[Queue] = None,
                    db_path: Optional[str] = None,
                    writer_queue: Optional[Queue] = None):

        conn = sqlite3.connect(db_path)
        
        table_name = self.rt_table_name

        cursor = conn.cursor()

        try:                    
            if buffer:
                self.insert_batch_data(conn, cursor, buffer, table_name, writer_queue)

        except sqlite3.Error as e:
            print(f"An error occurred: {e}")
            conn.rollback()
        finally:
            conn.commit()
            cursor.close()
            conn.close()

    def insert_batch_data(self, 
                          buffer: Queue,
                          table_name: str, 
                          writer_queue: Queue):
        # Insert data
        query = self.get_query(table_name)
        batch = []
        while True:
            if buffer.empty():
                time.sleep(5)
                continue

            item = buffer.get()
            if item is None:
                print("Reached Sentinal Value... Exiting thread...")
                break
            
            batch.append(item)
            if len(batch) == self.max_buffer_length:
                self.add_to_writer_queue(query, table_name, batch, writer_queue)
                print(f"Number of items added to writer_queue: {len(batch)}")
                batch.clear()

        # Insert any remaining records in the batch
        if batch:
            self.add_to_writer_queue(query, table_name, batch, writer_queue)

    def get_query(self, table_name: str) -> str:
        if not table_name:
            raise ValueError("Table name must not be empty")
        columns = self.query_schemas[table_name]
        placeholders = ', '.join(['?' for _ in columns])
        query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
        return query

    def insert_records(self, cursor: sqlite3.Cursor, conn: sqlite3.Connection, query: str, batch: List, table_name: str):
        try:
            columns = self.table_schemas[table_name]
            data_tuples = [
                tuple(getattr(row, col) for col in columns)
                for row in batch
            ]
            cursor.executemany(query, data_tuples)
            conn.commit()
            print(f"Inserted {len(batch)} records into {table_name}")
        except sqlite3.Error as e:
            print(f"SQLite error occurred while inserting records into {table_name}: {e}")
            conn.rollback()
        except Exception as e:
            print(f"Unexpected error occurred while inserting records into {table_name}: {e}")
            conn.rollback()

    def process_db_writes(self,
                          writer_queue: Queue, 
                          db_path: str):
        try:
            conn = sqlite3.connect(db_path)
            cursor = conn.cursor()
                
            while True:
                while writer_queue.empty():
                    time.sleep(2)
                    # Sleep for 30 seconds
                    
                query, table_name, data = writer_queue.get()
                assert len(data) == self.max_buffer_length, f"Expected {self.max_buffer_length} items, received: {len(data)}\n"
                self.insert_records(cursor, conn, query, data, table_name)
        except Exception as e:
            print(f"Error encountered in process_db_writes: {str(e)}")
        finally:
            cursor.close()
            conn.close()

    def add_to_writer_queue(self, 
                            query: str, 
                            table_name: str, 
                            batch: List, 
                            writer_queue: Queue):
        while writer_queue.full():
            time.sleep(1)
        assert len(batch) == self.max_buffer_length, f"Expected {self.max_buffer_length} items, received: {len(batch)}\n"
        writer_queue.put((query, table_name, batch))

@dataclass
class RawData:
    time: float 
    position: float = 0.0
    rpm: float = 0.0
    flow: float = 0.0
    density: float = 0.0
    pressure: float = 0.0
    tension: float = 0.0
    torque: float = 0.0
    weight: float = 0.0

class Raw(Tables):
    def __init__(self, **data):
        super().__init__(**data)
        self.rt_table_name = 'RawTable'

    def populate_queue(buffer: Queue):
        for i in range(1_000_000):
            while buffer.full():
                time.sleep(1)
            buffer.put(RawData(time=i))
    
def fetch_raw_data(db_path: str, rt_db_path: str, db_writer: Queue, max_buffer_length: int, ):
    try:
        conn = sqlite3.connect(db_path)

        buffer = Queue(maxsize=max_buffer_length)

        raw = Raw(max_buffer_length=max_buffer_length)
        # Starting other threads
        fetcher = Thread(target=raw.populate_queue, args=(buffer))
        writer = Thread(target=raw.insert_data, args=(buffer, rt_db_path, db_writer))

        # Start the Threads
        fetcher.start()
        writer.start()

        # Join the threads
        fetcher.join()
        writer.join()
    except KeyboardInterrupt:
        print("Finishing threads due to keyboard interruption.")
        fetcher.join()
        writer.join()
    except Exception as e:
        print("Error encountered: ", e)
    finally:
        if conn:
            conn.close()

def get_rt_db_path(db_path: str, db_extension: str = '.RT'):
        db_dir, db_file = os.path.split(db_path)
        db_name, _ = os.path.splitext(db_file)

        if db_name.endswith('_Raw'):
            db_name = db_name[:-4]
        rt_db_name = db_name + '_' + db_extension
        return os.path.join(db_dir, rt_db_name)

def main():
    db_path = input("Enter absolute path of raw.db file: ")

    try:
        maxsize=1000
        db_writer = Queue(maxsize=maxsize)
        tables = Tables(max_buffer_length=maxsize)

        rt_db_path = get_rt_db_path(db_path)

        db_writer_thread = Thread(target=tables.process_db_writes, args=(db_writer, rt_db_path))
        # Start the db_writer_thread
        db_writer_thread.start()
        fetch_raw_data(db_path, rt_db_path, maxsize, db_writer)

        db_writer_thread.join()
    except KeyboardInterrupt:
        db_writer_thread.join()
    except Exception:
        db_writer_thread.join()

The problem seems to occur between Tables.insert_batch_data() and Tables.add_to_writer_queue(). Most times the actual length of List passed on to add_to_writer_queue() is not equal to the len(batch) received in the add_to_writer_queue() function. I couldn't find if there is a limit to the overall data passed to a queue object in the documentation. Hence very confused why is the data getting lost and how to ensure all the data gets from Point A to Point B.


Solution

  • clearing batch also clears the list in the queue before it's processed.

    writer_queue.put((query, table_name, batch.copy()))

    Always pass immutable data or copies when sharing between threads to avoid side effects.