numpycsvstructured-array

Numpy savetxt Structured Array ValueError: fmt has wrong number of % formats


import numpy as np

row_a = ['0.01722497', '', '0.09496404', '0.03654174', '0.03624997', '0.01583785', '0.02002064', '0.13934049', '0.0405615', '0.05686177', '', '0.08495372', '0.00619173', '0.00515492', '0.01053369', '0.06576333']
row_b = [0.04871661, 0.1122536, 0.20836956, 0.05473605, 0.02344445, 0.01739371, 0.00524003, 0.0640286, 0.02766152, 0.02442267, 0.04183814, 0.04853815, 0.01682549, 0.00263045, 0.00819199, 0.1631007]
dt = np.dtype([('col_1', 'U32'), ('col_2', float)])
arr = np.empty((2, len(row_a)), dtype=dt)
arr['col_1'] = row_a
arr['col_2'] = row_b
np.savetxt('table.csv', arr, delimiter=',', header='col_1,col_2', fmt='%s %f')

Code above (which is supposed to create a structured array out of an str and int array and output it to a csv) gives me the following error, even though I have 2 arrays of same length, 2 columns and 2 formats:

ValueError: fmt has wrong number of % formats:  %s %f

Solution

  • Making a 1d structured array (as per my comment):

    In [423]: row_a = ['0.01722497', '', '0.09496404', '0.03654174', '0.03624997', '0.01583785', '0
         ...: .02002064', '0.13934049', '0.0405615', '0.05686177', '', '0.08495372', '0.00619173',
         ...: '0.00515492', '0.01053369', '0.06576333']
         ...: row_b = [0.04871661, 0.1122536, 0.20836956, 0.05473605, 0.02344445, 0.01739371, 0.005
         ...: 24003, 0.0640286, 0.02766152, 0.02442267, 0.04183814, 0.04853815, 0.01682549, 0.00263
         ...: 045, 0.00819199, 0.1631007]
         ...: dt = np.dtype([('col_1', 'U32'), ('col_2', float)])
         ...: arr = np.empty(len(row_a), dtype=dt)
         ...: arr['col_1'] = row_a
         ...: arr['col_2'] = row_b
    In [424]: 
    In [424]: arr
    Out[424]: 
    array([('0.01722497', 0.04871661), ('', 0.1122536 ),
           ('0.09496404', 0.20836956), ('0.03654174', 0.05473605),
           ('0.03624997', 0.02344445), ('0.01583785', 0.01739371),
           ('0.02002064', 0.00524003), ('0.13934049', 0.0640286 ),
           ('0.0405615', 0.02766152), ('0.05686177', 0.02442267),
           ('', 0.04183814), ('0.08495372', 0.04853815),
           ('0.00619173', 0.01682549), ('0.00515492', 0.00263045),
           ('0.01053369', 0.00819199), ('0.06576333', 0.1631007 )],
          dtype=[('col_1', '<U32'), ('col_2', '<f8')])
    In [425]: arr.shape
    Out[425]: (16,)
    

    And the save:

    In [426]: np.savetxt('table.csv', arr, delimiter=',', header='col_1,col_2', fmt='%s %f')
    In [427]: cat table.csv
    # col_1,col_2
    0.01722497 0.048717
     0.112254
    0.09496404 0.208370
    0.03654174 0.054736
    ...
    

    The linked SO that I answered before had a more complex dtype. This is a simple 2 field case, so doesn't need special handling.

    The """ values might give problems when file loading. I'd suggest at least using delimiter like ,, so the loader can treat it as missing value.

    In [428]: np.savetxt('table.csv', arr, delimiter=',', header='col_1,col_2', fmt='%s, %f')
    In [429]: cat table.csv
    # col_1,col_2
    0.01722497, 0.048717
    , 0.112254
    0.09496404, 0.208370
    0.03654174, 0.054736
    ...
    In [430]: np.genfromtxt('table.csv', dtype=None, names=True, delimiter=',')
    Out[430]: 
    array([(0.01722497, 0.048717), (       nan, 0.112254),
           (0.09496404, 0.20837 ), (0.03654174, 0.054736),
           (0.03624997, 0.023444), (0.01583785, 0.017394),
    
    In [431]: np.genfromtxt('table.csv', dtype=arr.dtype, names=True, delimiter=',')
    Out[431]: 
    array([('0.01722497', 0.048717), ('', 0.112254), ('0.09496404', 0.20837 ),
           ('0.03654174', 0.054736), ('0.03624997', 0.023444),
           ('0.01583785', 0.017394), ('0.02002064', 0.00524 ),