I'm working on interpolating battery capacity data based on the relationships between hour_rates
, capacities
and currents
. Here’s a sample of my data:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
# Data from Rolls S-480 flooded battery
capacity_data = [
[1, 135, 135], [2, 191, 95.63], [3, 221, 73.75],
[4, 244, 60.94], [5, 263, 52.5], [6, 278, 46.25],
[8, 300, 37.5], [10, 319, 31.88], [12, 334, 27.81],
[15, 352, 23.45], [20, 375, 18.75], [24, 386, 16.09],
[50, 438, 8.76], [72, 459, 6.38], [100, 486, 4.86]
]
capacity = pd.DataFrame(capacity_data, columns=['hour_rates', 'capacities_o', 'currents'])
capacity['capacities'] = np.around(capacity['currents'] * capacity['hour_rates'], 3)
The columns relate as follows:
hour_rates (h) = capacities (Ah) / currents (A)
capacities (Ah) = hour_rates (h) * currents (A)
currents (A) = capacities (Ah) / hour_rates (h)
Objective: I want to interpolate capacities
and hour_rates
for a range of currents
values using logarithmic scaling for better accuracy.
Custom interpolation class and function to achieve this. Here’s the code:
from typing import Union
class interpolate1d(interp1d):
"""Extend scipy interp1d to interpolate/extrapolate per axis in log space"""
def __init__(self, x, y, *args, xspace='linear', yspace='linear', **kwargs):
self.xspace = xspace
self.yspace = yspace
if self.xspace == 'log': x = np.log10(x)
if self.yspace == 'log': y = np.log10(y)
super().__init__(x, y, *args, **kwargs)
def __call__(self, x, *args, **kwargs):
if self.xspace == 'log': x = np.log10(x)
if self.yspace == 'log':
return 10**super().__call__(x, *args, **kwargs)
else:
return super().__call__(x, *args, **kwargs)
def interpolate_cap_by_current(df: list,
current_values: list,
kind: Union[str, int] = 'linear',
hr_limit: int = 600
):
"""
Interpolate Battery Capacity Values From Current list values
"""
result = 0
if isinstance(np_data, np.ndarray):
# Create interpolation functions for hour rates and capacities
# Setting kind='cubic' for better fitting to nonlinear data
hour_rate_interp_func = interpolate1d(
df['currents'],
df['hour_rates'],
xspace='log',
yspace='log',
fill_value="extrapolate",
kind=kind
)
capacity_interp_func = interpolate1d(
df['currents'],
df['capacities'],
xspace='log',
yspace='log',
fill_value="extrapolate",
kind=kind
) # , kind='cubic'
# Calculate interpolated values for new currents
hour_rate_interpolated = hour_rate_interp_func(current_values)
capacity_interpolated = capacity_interp_func(current_values)
# Create a DataFrame for the results
calc_cap = np.around(current_values * hour_rate_interpolated, 3)
calc_hr = np.around(capacity_interpolated / current_values, 3)
diff_cap = np.around(capacity_interpolated - calc_cap, 3)
diff_hr = np.around(hour_rate_interpolated - calc_hr, 3)
real_hr = np.around(hour_rate_interpolated - diff_hr, 3)
real_cap = np.around(current_values * real_hr, 3)
real_current = np.around(real_cap / real_hr, 3)
result = pd.DataFrame({
'currents': current_values,
'hour_rates': hour_rate_interpolated,
'capacities': capacity_interpolated,
'calc_cap': calc_cap,
'real_cap': real_cap,
'diff_cap': diff_cap,
'calc_hr': calc_hr,
'real_hr': real_hr,
'diff_hr': diff_hr,
'real_current': real_current,
'diff_current': np.around(current_values - real_current, 3),
})
result = result[result['hour_rates'] < hr_limit]
return result
def plot_grid(major_ticks: list,
minor_ticks: list,
):
"""Set X Grid ticks"""
ax=plt.gca()
ax.grid(True)
ax.set_xticks(major_ticks)
ax.set_xticks(minor_ticks, minor=True)
ax.grid(which='minor', alpha=0.2)
ax.grid(which='major', alpha=0.5)
currents_list = np.array([
0.1, 0.2, 0.4, 0.5, 0.6, 0.8, 1, 1.5, 1.7, 2, 2.2, 2.5,
3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 17, 20, 22, 25, 27, 30, 32,
35, 37, 40, 60, 80, 120, 150, 180, 220, 250
])
capacities = interpolate_cap_by_current(
df=capacity,
current_values=currents_list,
kind='quadratic'
)
rel_current = np.around(capacity['capacities']/capacity['hour_rates'], 3)
# linear, nearest, nearest-up, zero, slinear, quadratic, cubic, previous, or next. zero, slinear, quadratic and cubic
plt.figure(figsize=(18, 15))
plt.subplot(3, 1, 1)
plt.plot(capacities['real_hr'], capacities['capacities'], label='Interpolated Capacitiy')
plt.plot(capacities['real_hr'], capacities['calc_cap'], label='Calculated Capacitiy')
plt.plot(capacities['real_hr'], capacities['real_cap'], label='Real Capacitiy')
plt.plot(capacity['hour_rates'], capacity['capacities'], label='Capacitiy')
plt.ylabel('Capacity (A/h)')
plt.xlabel('Hour Rate (h)')
plt.title('Battery Hour Rate / Capacity relationship')
plt.legend()
max_tick = capacities['hour_rates'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 2)
plt.plot(capacities['real_hr'], capacities['currents'], label='Interpolated Current (A)')
plt.plot(capacities['real_hr'], capacities['real_current'], label='Real Current (A)')
plt.plot(capacity['hour_rates'], rel_current, label='Calculated Original Current Relation (A)')
plt.plot(capacity['hour_rates'], capacity['currents'], label='Current (A)')
plt.ylabel('Current (A)')
plt.xlabel('Hour Rate (h)')
plt.title('Battery Hour Rate / Current relationship')
plt.legend()
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 3)
plt.plot(capacities['currents'], capacities['capacities'], label='Interpolated capacity / current')
plt.plot(capacities['currents'], capacities['calc_cap'], label='Calculated capacity / current')
plt.plot(capacity['currents'], capacity['capacities'], label='capacity / current')
plt.ylabel('Capacity (A/h)')
plt.xlabel('Current (A)')
plt.title('Battery Current / Capacity relationship')
plt.xscale('linear')
plt.yscale('linear')
plt.legend()
max_tick = capacities['currents'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
Even though I've configured the interpolation in logarithmic space, the interpolated values still don’t match the calculated values when verified against the relationships provided. I’ve illustrated this discrepancy in the plots below, where I calculate the difference by applying the original relationships to the interpolated results.
plt.figure(figsize=(18, 15))
plt.subplot(3, 1, 1)
plt.plot(capacities['hour_rates'], capacities['diff_cap'], label='Diff Capacity')
plt.plot(capacities['hour_rates'], capacities['diff_hr'], label='Diff Hour Rate')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Hour Rate (h)')
plt.title('Interpolation Data Relationship By Hour Rate')
plt.legend()
max_tick = capacities['hour_rates'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 2)
plt.plot(capacities['capacities'], capacities['diff_cap'], label='Diff Capacity')
plt.plot(capacities['capacities'], capacities['diff_hr'], label='Diff Hour Rate')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Capacity (A/h)')
plt.title('Interpolation Data Relationship By Capacity')
plt.legend()
max_tick = capacities['capacities'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 3)
plt.plot(capacities['currents'], capacities['diff_cap'], label='Diff Capacity')
plt.plot(capacities['currents'], capacities['diff_hr'], label='Diff Hour Rate')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Current (A)')
plt.title('Interpolation Data Relationship By Current')
plt.legend()
max_tick = capacities['currents'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
Is there a way to improve the accuracy of the interpolation on a logarithmic scale for this type of data relationship? I understand that current values outside the range of (4.86 A, 135 A) may lead to inaccurate results due to extrapolation.
I’ve updated the code above to improve interpolation accuracy:
plt.figure(figsize=(18, 15))
plt.subplot(3, 1, 1)
plt.plot(capacities['real_hr'], capacities['diff_current'], label='Diff Current')
plt.plot(capacity['hour_rates'], capacity['currents'] - rel_current, label='Diff Original Current Relation')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Hour Rate (h)')
plt.title('Interpolation Data Relationship By Hour Rate')
plt.legend()
max_tick = capacities['hour_rates'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 2)
plt.plot(capacities['real_cap'], capacities['diff_current'], label='Diff Current')
plt.plot(capacity['capacities'], capacity['currents'] - rel_current, label='Diff Original Current Relation')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Capacity (A/h)')
plt.title('Interpolation Data Relationship By Capacity')
plt.legend()
max_tick = capacities['capacities'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 3)
plt.plot(capacities['currents'], capacities['diff_current'], label='Diff Current')
plt.plot(capacity['currents'], capacity['currents'] - rel_current, label='Diff Original Current Relation')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Current (A)')
plt.title('Interpolation Data Relationship By Current')
plt.legend()
max_tick = capacities['currents'].max() + 10
plot_grid(
major_ticks=np.arange(0, max_tick, 20),
minor_ticks=np.arange(0, max_tick, 5)
)
I’ve made additional updates to the code to further improve interpolation accuracy:
- Rounded all values to 3 decimal places to minimize insignificant errors.
- Observing the updated graphs,
`hour_rate` interpolation values are more accurate than `capacity` interpolation values.
I’ve adjusted the code to interpolate only `hour_rate` and then calculate `capacity` using the relationship `capacity = hour_rate * current`.
Below are the updated graphs:
Looking on your currency data described relations:
hour_rates (h) = capacities (Ah) / currents (A)
capacities (Ah) = hour_rates (h) * currents (A)
currents (A) = capacities (Ah) / hour_rates (h)
These are not met explicitly in the data you presented. I've created the data which are exactly like the presented results:
capacity_data_corr = capacity[['hour_rates', 'capacities']]
capacity_data_corr['currents'] = capacity_data_corr['capacities']/capacity_data_corr['hour_rates']
Interpolation is almost ideal
This means, that the interpolation obtained can be good, but the data does not meet assumed relations. If these relations are only approximate, in such long horizon error like this should not be as bad as it looks.