python json numpy serialization typeerror

TypeError: Object of type ndarray is not JSON serializable despite custom converter (Nested Dict/NumPy 2.0)

I am working with simulation results stored in a deeply nested defaultdict structure. This structure (data_to_save) mixes standard Python types (lists, ints, floats, None) with NumPy arrays.

I need to save this entire data_to_save structure into a JSON file. Because json.dump cannot handle NumPy arrays natively, and I'm using NumPy 2.0 (where older type aliases are removed), I've written a custom recursive function, make_json_serializable, to convert NumPy types and handle potential issues like np.nan:

import numpy as np
import json
from collections import defaultdict

# Custom converter for NumPy types and NaNs
def make_json_serializable(obj):
    if isinstance(obj, np.ndarray): return obj.tolist()
    # Important: Handle defaultdict specifically before dict
    if isinstance(obj, defaultdict): return {k: make_json_serializable(v) for k, v in obj.items()}
    if isinstance(obj, dict): return {k: make_json_serializable(v) for k, v in obj.items()}
    if isinstance(obj, list): return [make_json_serializable(i) for i in obj]
    # Check base Python types first
    if isinstance(obj, (int, float, bool, complex, str, type(None))): return obj
    # Check abstract NumPy types (NumPy 2.0+)
    if isinstance(obj, np.integer): return int(obj)
    if isinstance(obj, np.floating): return float(obj) if not np.isnan(obj) else None # Convert NaN to None for JSON compatibility
    if isinstance(obj, np.complexfloating): return {'real': float(obj.real), 'imag': float(obj.imag)}
    if isinstance(obj, np.bool_): return bool(obj)
    if isinstance(obj, (np.void)): return None
    # Fallback if type not recognized
    # print(f"Warning: Type {type(obj)} not explicitly handled. Passing as is.") # Optional debug
    return obj
# --- END CONVERTER ---

# Example of the nested structure (simplified)
data_to_save = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict))))
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['parameter_grid'] = np.linspace(0, 1, 5)
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['metric_array_1'] = np.array([0.1, 0.2, 0.5, 0.8, 1.0])
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['num_valid_entries'] = 5
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['distribution_data'] = {
    '0.5': {'size_values': [1, 2, 3], 'normalized_counts': np.array([0.5, 0.3, 0.2]), 'source_count': 5}
}
# ... structure can be deeply nested with more arrays ...

My attempt to save this data results in a TypeError:

# Current (failing) saving code:
output_filename = "output_data.json"
try:
    # PROBLEM: This tries to dump the raw structure with NumPy arrays using the standard encoder
    temp_dict_structure = json.loads(json.dumps(data_to_save)) # <-- FAILS HERE with TypeError

    # The custom converter is intended to run here, but never gets called due to the previous line's error
    serializable_data = make_json_serializable(temp_dict_structure)

    with open(output_filename, "w") as f:
        json.dump(serializable_data, f, indent=2, allow_nan=False) # NaNs should be None by converter
    print(f"Data saved to: {output_filename}")

except TypeError as e_json:
    print(f"ERROR during JSON processing: {e_json}") # Output: ERROR during JSON processing: Object of type ndarray is not JSON serializable
except Exception as e_save:
    print(f"ERROR saving file: {e_save}")

The error message TypeError: Object of type ndarray is not JSON serializable points to the json.dumps(data_to_save) call. This confirms that the default JSON encoder encounters a NumPy array before my make_json_serializable function has a chance to convert it. The intermediate json.loads(json.dumps(...)) was likely an attempt to convert defaultdicts, but it triggers the TypeError first.

Question:

How can I correctly apply my custom conversion function (make_json_serializable) to the entire nested defaultdict structure (data_to_save) before passing it to json.dump? The goal is to ensure all nested NumPy arrays and special numeric types are converted into basic Python lists, ints, floats, or None, resolving the TypeError.

Solution

json.dump has a cls argument that can be passed a custom json.JSONEncoder. Override its default method to tell it how to serialize objects that JSON doesn't understand:

import numpy as np
import json
from collections import defaultdict

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super().default(obj)

data_to_save = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict))))
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['parameter_grid'] = np.linspace(0, 1, 5)
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['metric_array_1'] = np.array([0.1, 0.2, 0.5, 0.8, 1.0])
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['num_valid_entries'] = 5
data_to_save['category_A']['size_X']['config_1']['setting_alpha']['distribution_data'] = {
    '0.5': {'size_values': [1, 2, 3], 'normalized_counts': np.array([0.5, 0.3, 0.2]), 'source_count': 5}
}

with open('output.json', 'w', encoding='utf8') as file:
    json.dump(data_to_save, file, indent=2, cls=CustomEncoder)

output.json:

{
  "category_A": {
    "size_X": {
      "config_1": {
        "setting_alpha": {
          "parameter_grid": [
            0.0,
            0.25,
            0.5,
            0.75,
            1.0
          ],
          "metric_array_1": [
            0.1,
            0.2,
            0.5,
            0.8,
            1.0
          ],
          "num_valid_entries": 5,
          "distribution_data": {
            "0.5": {
              "size_values": [
                1,
                2,
                3
              ],
              "normalized_counts": [
                0.5,
                0.3,
                0.2
              ],
              "source_count": 5
            }
          }
        }
      }
    }
  }
}

Also note that json.dump also has an allow_nan parameter that defaults to True. This is not strictly allowed by the JSON specification, but if the recipient of the JSON also allows NaN then the default can be used. Otherwise, set it to False and add support in the JSONEncoder.