I'm attempting to send a CSV file with a large amount of columns to a MySQL database. Upon running my code I am met with this error message:
Traceback (most recent call last):
File "/home/dan-brown/apollo/src/cicflowmeter/datbase_handling.py", line 24, in <module>
db_post()
File "/home/dan-brown/apollo/src/cicflowmeter/datbase_handling.py", line 22, in db_post
mycursor.execute(query, (row,))
File "/home/dan-brown/PythonVenv/my-venv/lib/python3.12/site-packages/mysql/connector/cursor_cext.py", line 350, in execute
stmt = RE_PY_PARAM.sub(psub, stmt)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/dan-brown/PythonVenv/my-venv/lib/python3.12/site-packages/mysql/connector/cursor_cext.py", line 115, in __call__
raise ProgrammingError( mysql.connector.errors.ProgrammingError: Not enough parameters for the SQL statement
It is to my understanding that this means that the number of variables I have requested in the query versus the amount read on the CSV file do not match but I have double checked this numerous times only to find the same 84 features. Here is the code:
import mysql.connector
#connect to database
db = mysql.connector.connect(
host="localhost",
user="root",
passwd="root",
database="Apollo"
)
#active user
mycursor = db.cursor()
#setup table. Should be called on install
def db_setup():
mycursor.execute("CREATE TABLE NetworkLogs (id INT AUTO_INCREMENT PRIMARY KEY, src_ip VARCHAR(15), dst_ip VARCHAR(15), src_port INT, dst_port INT, src_mac VARCHAR(17), dst_mac VARCHAR(17), protocol INT, timestamp DATETIME, flow_duration FLOAT, flow_byts_s FLOAT, flow_pkts_s FLOAT, fwd_pkts_s FLOAT, bwd_pkts_s FLOAT, tot_fwd_pkts INT, tot_bwd_pkts INT, totlen_fwd_pkts INT, totlen_bwd_pkts INT, fwd_pkt_len_max FLOAT, fwd_pkt_len_min FLOAT, fwd_pkt_len_mean FLOAT, fwd_pkt_len_std FLOAT, bwd_pkt_len_max FLOAT, bwd_pkt_len_min FLOAT, bwd_pkt_len_mean FLOAT, bwd_pkt_len_std FLOAT, pkt_len_max FLOAT, pkt_len_min FLOAT, pkt_len_mean FLOAT, pkt_len_std FLOAT, pkt_len_var FLOAT, fwd_header_len INT, bwd_header_len INT, fwd_seg_size_min INT, fwd_act_data_pkts INT, flow_iat_mean FLOAT, flow_iat_max FLOAT, flow_iat_min FLOAT, flow_iat_std FLOAT, fwd_iat_tot FLOAT, fwd_iat_max FLOAT, fwd_iat_min FLOAT, fwd_iat_mean FLOAT, fwd_iat_std FLOAT, bwd_iat_tot FLOAT, bwd_iat_max FLOAT, bwd_iat_min FLOAT, bwd_iat_mean FLOAT, bwd_iat_std FLOAT, fwd_psh_flags INT, bwd_psh_flags INT, fwd_urg_flags INT, bwd_urg_flags INT, fin_flag_cnt INT, syn_flag_cnt INT, rst_flag_cnt INT, psh_flag_cnt INT, ack_flag_cnt INT, urg_flag_cnt INT, ece_flag_cnt INT, down_up_ratio FLOAT, pkt_size_avg FLOAT, init_fwd_win_byts INT, init_bwd_win_byts INT, active_max FLOAT, active_min FLOAT, active_mean FLOAT, active_std FLOAT, idle_max FLOAT, idle_min FLOAT, idle_mean FLOAT, idle_std FLOAT, fwd_byts_b_avg FLOAT, fwd_pkts_b_avg FLOAT, bwd_byts_b_avg FLOAT, bwd_pkts_b_avg FLOAT, fwd_blk_rate_avg FLOAT, bwd_blk_rate_avg FLOAT, fwd_seg_size_avg FLOAT, bwd_seg_size_avg FLOAT, cwe_flag_count INT, subflow_fwd_pkts INT, subflow_bwd_pkts INT, subflow_fwd_byts INT, subflow_bwd_byts INT)")
#inserts recorded flows to db
def db_post():
with open('src/cicflowmeter/test1.csv', 'r') as f:
for row in f:
query = "INSERT INTO NetworkLogs (src_ip, dst_ip, src_port, dst_port, src_mac, dst_mac, protocol, timestamp, flow_duration, flow_byts_s, flow_pkts_s, fwd_pkts_s, bwd_pkts_s, tot_fwd_pkts, tot_bwd_pkts, totlen_fwd_pkts, totlen_bwd_pkts, fwd_pkt_len_max, fwd_pkt_len_min, fwd_pkt_len_mean, fwd_pkt_len_std, bwd_pkt_len_max, bwd_pkt_len_min, bwd_pkt_len_mean, bwd_pkt_len_std, pkt_len_max, pkt_len_min, pkt_len_mean, pkt_len_std, pkt_len_var, fwd_header_len, bwd_header_len, fwd_seg_size_min, fwd_act_data_pkts, flow_iat_mean, flow_iat_max, flow_iat_min, flow_iat_std, fwd_iat_tot, fwd_iat_max, fwd_iat_min, fwd_iat_mean, fwd_iat_std, bwd_iat_tot, bwd_iat_max, bwd_iat_min, bwd_iat_mean, bwd_iat_std, fwd_psh_flags, bwd_psh_flags, fwd_urg_flags, bwd_urg_flags, fin_flag_cnt, syn_flag_cnt, rst_flag_cnt, psh_flag_cnt, ack_flag_cnt, urg_flag_cnt, ece_flag_cnt, down_up_ratio, pkt_size_avg, init_fwd_win_byts, init_bwd_win_byts, active_max, active_min, active_mean, active_std, idle_max, idle_min, idle_mean, idle_std, fwd_byts_b_avg, fwd_pkts_b_avg, bwd_byts_b_avg, bwd_pkts_b_avg, fwd_blk_rate_avg, bwd_blk_rate_avg, fwd_seg_size_avg, bwd_seg_size_avg, cwe_flag_count, subflow_fwd_pkts, subflow_bwd_pkts, subflow_fwd_byts, subflow_bwd_byts) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')"
mycursor.execute(query, (row,))
db.commit()
db_post()
And this is the CSV file that is being read:
192.168.1.16,192.168.1.255,59222,15600,64:e7:d8:33:f6:76,ff:ff:ff:ff:ff:ff,17,2024-10-05 18:07:59,0.0,0,0,0,0,1,0,77,0,77.0,77.0,77.0,0.0,0.0,0.0,0.0,0.0,77,77,77.0,0.0,0.0,8,0,8,1,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,0,0,0,0.0,77.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,77.0,0.0,0,1,0,77,0
I have tried multiple forum posts but I am yet to find an answer.
I apologize if this is laid out incorrectly or is a simple mistake. I am not an experienced programmer, any help would be greatly appreciated.
Thank you.
There are multiple problems:
csv.reader()
for this.row
in a tuple, so it's just a single parameter value. Just use row
by itself.%s
placeholders in the SQL should not be quoted. That makes them literal, and they won't be replaced with the parameter values.#inserts recorded flows to db
def db_post():
query = "INSERT INTO NetworkLogs (src_ip, dst_ip, src_port, dst_port, src_mac, dst_mac, protocol, timestamp, flow_duration, flow_byts_s, flow_pkts_s, fwd_pkts_s, bwd_pkts_s, tot_fwd_pkts, tot_bwd_pkts, totlen_fwd_pkts, totlen_bwd_pkts, fwd_pkt_len_max, fwd_pkt_len_min, fwd_pkt_len_mean, fwd_pkt_len_std, bwd_pkt_len_max, bwd_pkt_len_min, bwd_pkt_len_mean, bwd_pkt_len_std, pkt_len_max, pkt_len_min, pkt_len_mean, pkt_len_std, pkt_len_var, fwd_header_len, bwd_header_len, fwd_seg_size_min, fwd_act_data_pkts, flow_iat_mean, flow_iat_max, flow_iat_min, flow_iat_std, fwd_iat_tot, fwd_iat_max, fwd_iat_min, fwd_iat_mean, fwd_iat_std, bwd_iat_tot, bwd_iat_max, bwd_iat_min, bwd_iat_mean, bwd_iat_std, fwd_psh_flags, bwd_psh_flags, fwd_urg_flags, bwd_urg_flags, fin_flag_cnt, syn_flag_cnt, rst_flag_cnt, psh_flag_cnt, ack_flag_cnt, urg_flag_cnt, ece_flag_cnt, down_up_ratio, pkt_size_avg, init_fwd_win_byts, init_bwd_win_byts, active_max, active_min, active_mean, active_std, idle_max, idle_min, idle_mean, idle_std, fwd_byts_b_avg, fwd_pkts_b_avg, bwd_byts_b_avg, bwd_pkts_b_avg, fwd_blk_rate_avg, bwd_blk_rate_avg, fwd_seg_size_avg, bwd_seg_size_avg, cwe_flag_count, subflow_fwd_pkts, subflow_bwd_pkts, subflow_fwd_byts, subflow_bwd_byts) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" with open('src/cicflowmeter/test1.csv', 'r') as f:
csv_f = csv.reader(f)
for row in csv_f:
mycursor.execute(query, row)
db.commit()
There's also no need to re-assign query
each time through the loop, it never changes.
You can also do this more efficiently using mycursor.executemany()
. It takes a 2-dimensional iterable and does the looping itself. It will batch up multiple rows to reduce the number of queries sent to the DB.
#inserts recorded flows to db
def db_post():
query = "INSERT INTO NetworkLogs (src_ip, dst_ip, src_port, dst_port, src_mac, dst_mac, protocol, timestamp, flow_duration, flow_byts_s, flow_pkts_s, fwd_pkts_s, bwd_pkts_s, tot_fwd_pkts, tot_bwd_pkts, totlen_fwd_pkts, totlen_bwd_pkts, fwd_pkt_len_max, fwd_pkt_len_min, fwd_pkt_len_mean, fwd_pkt_len_std, bwd_pkt_len_max, bwd_pkt_len_min, bwd_pkt_len_mean, bwd_pkt_len_std, pkt_len_max, pkt_len_min, pkt_len_mean, pkt_len_std, pkt_len_var, fwd_header_len, bwd_header_len, fwd_seg_size_min, fwd_act_data_pkts, flow_iat_mean, flow_iat_max, flow_iat_min, flow_iat_std, fwd_iat_tot, fwd_iat_max, fwd_iat_min, fwd_iat_mean, fwd_iat_std, bwd_iat_tot, bwd_iat_max, bwd_iat_min, bwd_iat_mean, bwd_iat_std, fwd_psh_flags, bwd_psh_flags, fwd_urg_flags, bwd_urg_flags, fin_flag_cnt, syn_flag_cnt, rst_flag_cnt, psh_flag_cnt, ack_flag_cnt, urg_flag_cnt, ece_flag_cnt, down_up_ratio, pkt_size_avg, init_fwd_win_byts, init_bwd_win_byts, active_max, active_min, active_mean, active_std, idle_max, idle_min, idle_mean, idle_std, fwd_byts_b_avg, fwd_pkts_b_avg, bwd_byts_b_avg, bwd_pkts_b_avg, fwd_blk_rate_avg, bwd_blk_rate_avg, fwd_seg_size_avg, bwd_seg_size_avg, cwe_flag_count, subflow_fwd_pkts, subflow_bwd_pkts, subflow_fwd_byts, subflow_bwd_byts) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
with open('src/cicflowmeter/test1.csv', 'r') as f:
csv_f = csv.reader(f)
mycursor.executemany(query, csv_f)
db.commit()