pythonpandasnumpyscipyinterpolation

Interpolating Battery Capacity Data in Logarithmic Scale with Python


I'm working on interpolating battery capacity data based on the relationships between hour_rates, capacities and currents. Here’s a sample of my data:

import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

# Data from Rolls S-480 flooded battery
capacity_data = [
    [1, 135, 135], [2, 191, 95.63], [3, 221, 73.75],
    [4, 244, 60.94], [5, 263, 52.5], [6, 278, 46.25],
    [8, 300, 37.5], [10, 319, 31.88], [12, 334, 27.81],
    [15, 352, 23.45], [20, 375, 18.75], [24, 386, 16.09],
    [50, 438, 8.76], [72, 459, 6.38], [100, 486, 4.86]
]
capacity = pd.DataFrame(capacity_data, columns=['hour_rates', 'capacities_o', 'currents'])
capacity['capacities'] = np.around(capacity['currents'] * capacity['hour_rates'], 3)

The columns relate as follows:

Objective: I want to interpolate capacities and hour_rates for a range of currents values using logarithmic scaling for better accuracy.

Code

Custom interpolation class and function to achieve this. Here’s the code:

from typing import Union

class interpolate1d(interp1d):
    """Extend scipy interp1d to interpolate/extrapolate per axis in log space"""
    
    def __init__(self, x, y, *args, xspace='linear', yspace='linear', **kwargs):
        self.xspace = xspace
        self.yspace = yspace
        if self.xspace == 'log': x = np.log10(x)
        if self.yspace == 'log': y = np.log10(y)
        super().__init__(x, y, *args, **kwargs)
        
    def __call__(self, x, *args, **kwargs):
        if self.xspace == 'log': x = np.log10(x)
        if self.yspace == 'log':
            return 10**super().__call__(x, *args, **kwargs)
        else:
            return super().__call__(x, *args, **kwargs)


def interpolate_cap_by_current(df: list,
                               current_values: list,
                               kind: Union[str, int] = 'linear',
                               hr_limit: int = 600
                               ):
    """
    Interpolate Battery Capacity Values From Current list values
    """
    result = 0
    if isinstance(np_data, np.ndarray):
        # Create interpolation functions for hour rates and capacities
        # Setting kind='cubic' for better fitting to nonlinear data
        hour_rate_interp_func = interpolate1d(
            df['currents'],
            df['hour_rates'],
            xspace='log',
            yspace='log',
            fill_value="extrapolate",
            kind=kind
        )
        capacity_interp_func = interpolate1d(
            df['currents'],
            df['capacities'],
            xspace='log',
            yspace='log',
            fill_value="extrapolate",
            kind=kind
        ) # , kind='cubic'

        # Calculate interpolated values for new currents
        hour_rate_interpolated = hour_rate_interp_func(current_values)
        capacity_interpolated = capacity_interp_func(current_values)

        # Create a DataFrame for the results
        calc_cap = np.around(current_values * hour_rate_interpolated, 3)
        calc_hr = np.around(capacity_interpolated / current_values, 3)
        diff_cap = np.around(capacity_interpolated - calc_cap, 3)
        diff_hr = np.around(hour_rate_interpolated - calc_hr, 3)
        real_hr = np.around(hour_rate_interpolated - diff_hr, 3)
        real_cap = np.around(current_values * real_hr, 3)
        real_current = np.around(real_cap / real_hr, 3)
        result = pd.DataFrame({
            'currents': current_values,
            'hour_rates': hour_rate_interpolated,
            'capacities': capacity_interpolated,
            'calc_cap': calc_cap,
            'real_cap': real_cap,
            'diff_cap': diff_cap,
            'calc_hr': calc_hr,
            'real_hr': real_hr,
            'diff_hr': diff_hr,
            'real_current': real_current,
            'diff_current': np.around(current_values - real_current, 3),
        })
        
        result = result[result['hour_rates'] < hr_limit]
    return result

def plot_grid(major_ticks: list,
              minor_ticks: list,
              ):
    """Set X Grid ticks"""
    ax=plt.gca()
    ax.grid(True)
    ax.set_xticks(major_ticks)
    ax.set_xticks(minor_ticks, minor=True)
    ax.grid(which='minor', alpha=0.2)
    ax.grid(which='major', alpha=0.5)

Visualisation:

currents_list = np.array([
    0.1, 0.2, 0.4, 0.5, 0.6, 0.8, 1, 1.5, 1.7, 2, 2.2, 2.5,
    3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 17, 20, 22, 25, 27, 30, 32,
    35, 37, 40, 60, 80, 120, 150, 180, 220, 250
])
capacities = interpolate_cap_by_current(
    df=capacity,
    current_values=currents_list,
    kind='quadratic'
)
rel_current = np.around(capacity['capacities']/capacity['hour_rates'], 3)
#  linear, nearest, nearest-up, zero, slinear, quadratic, cubic, previous, or next. zero, slinear, quadratic and cubic
plt.figure(figsize=(18, 15))
plt.subplot(3, 1, 1)
plt.plot(capacities['real_hr'], capacities['capacities'], label='Interpolated Capacitiy')
plt.plot(capacities['real_hr'], capacities['calc_cap'], label='Calculated Capacitiy')
plt.plot(capacities['real_hr'], capacities['real_cap'], label='Real Capacitiy')
plt.plot(capacity['hour_rates'], capacity['capacities'], label='Capacitiy')
plt.ylabel('Capacity (A/h)')
plt.xlabel('Hour Rate (h)')
plt.title('Battery Hour Rate / Capacity relationship')
plt.legend()
max_tick = capacities['hour_rates'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 2)
plt.plot(capacities['real_hr'], capacities['currents'], label='Interpolated Current (A)')
plt.plot(capacities['real_hr'], capacities['real_current'], label='Real Current (A)')
plt.plot(capacity['hour_rates'], rel_current, label='Calculated Original Current Relation (A)')
plt.plot(capacity['hour_rates'], capacity['currents'], label='Current (A)')
plt.ylabel('Current (A)')
plt.xlabel('Hour Rate (h)')
plt.title('Battery Hour Rate / Current relationship')
plt.legend()
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 3)
plt.plot(capacities['currents'], capacities['capacities'], label='Interpolated capacity / current')
plt.plot(capacities['currents'], capacities['calc_cap'], label='Calculated capacity / current')
plt.plot(capacity['currents'], capacity['capacities'], label='capacity / current')
plt.ylabel('Capacity (A/h)')
plt.xlabel('Current (A)')
plt.title('Battery Current / Capacity relationship')
plt.xscale('linear')
plt.yscale('linear')
plt.legend()
max_tick = capacities['currents'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)

enter image description here

Problem

Even though I've configured the interpolation in logarithmic space, the interpolated values still don’t match the calculated values when verified against the relationships provided. I’ve illustrated this discrepancy in the plots below, where I calculate the difference by applying the original relationships to the interpolated results.

plt.figure(figsize=(18, 15))
plt.subplot(3, 1, 1)
plt.plot(capacities['hour_rates'], capacities['diff_cap'], label='Diff Capacity')
plt.plot(capacities['hour_rates'], capacities['diff_hr'], label='Diff Hour Rate')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Hour Rate (h)')
plt.title('Interpolation Data Relationship By Hour Rate')
plt.legend()
max_tick = capacities['hour_rates'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 2)
plt.plot(capacities['capacities'], capacities['diff_cap'], label='Diff Capacity')
plt.plot(capacities['capacities'], capacities['diff_hr'], label='Diff Hour Rate')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Capacity (A/h)')
plt.title('Interpolation Data Relationship By Capacity')
plt.legend()
max_tick = capacities['capacities'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 3)
plt.plot(capacities['currents'], capacities['diff_cap'], label='Diff Capacity')
plt.plot(capacities['currents'], capacities['diff_hr'], label='Diff Hour Rate')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Current (A)')
plt.title('Interpolation Data Relationship By Current')
plt.legend()
max_tick = capacities['currents'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)

enter image description here

Is there a way to improve the accuracy of the interpolation on a logarithmic scale for this type of data relationship? I understand that current values outside the range of (4.86 A, 135 A) may lead to inaccurate results due to extrapolation.

Edit

I’ve updated the code above to improve interpolation accuracy:

plt.figure(figsize=(18, 15))
plt.subplot(3, 1, 1)
plt.plot(capacities['real_hr'], capacities['diff_current'], label='Diff Current')
plt.plot(capacity['hour_rates'], capacity['currents'] - rel_current, label='Diff Original Current Relation')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Hour Rate (h)')
plt.title('Interpolation Data Relationship By Hour Rate')
plt.legend()
max_tick = capacities['hour_rates'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 2)
plt.plot(capacities['real_cap'], capacities['diff_current'], label='Diff Current')
plt.plot(capacity['capacities'], capacity['currents'] - rel_current, label='Diff Original Current Relation')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Capacity (A/h)')
plt.title('Interpolation Data Relationship By Capacity')
plt.legend()
max_tick = capacities['capacities'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)
plt.subplot(3, 1, 3)
plt.plot(capacities['currents'], capacities['diff_current'], label='Diff Current')
plt.plot(capacity['currents'], capacity['currents'] - rel_current, label='Diff Original Current Relation')
plt.ylabel('Diff Interpolated / Calculated')
plt.xlabel('Current (A)')
plt.title('Interpolation Data Relationship By Current')
plt.legend()
max_tick = capacities['currents'].max() + 10
plot_grid(
    major_ticks=np.arange(0, max_tick, 20),
    minor_ticks=np.arange(0, max_tick, 5)
)

enter image description here

Edit 2

I’ve made additional updates to the code to further improve interpolation accuracy:

- Rounded all values to 3 decimal places to minimize insignificant errors.
- Observing the updated graphs,
`hour_rate` interpolation values are more accurate than `capacity` interpolation values.
I’ve adjusted the code to interpolate only `hour_rate` and then calculate `capacity` using the relationship `capacity = hour_rate * current`.

Below are the updated graphs:

Data Visualization

enter image description here

Difference Between Interpolated and Calculated Capacity and Hour Rate

enter image description here

Difference Between Interpolated and Calculated Current

enter image description here


Solution

  • Looking on your currency data described relations:

    hour_rates (h) = capacities (Ah) / currents (A)
    capacities (Ah) = hour_rates (h) * currents (A)
    currents (A) = capacities (Ah) / hour_rates (h)
    

    These are not met explicitly in the data you presented. I've created the data which are exactly like the presented results:

    capacity_data_corr = capacity[['hour_rates', 'capacities']]
    capacity_data_corr['currents'] = capacity_data_corr['capacities']/capacity_data_corr['hour_rates']
    

    Interpolation is almost ideal

    enter image description here enter image description here

    This means, that the interpolation obtained can be good, but the data does not meet assumed relations. If these relations are only approximate, in such long horizon error like this should not be as bad as it looks.