I am encountering a strange issue while building a speech-to-text model.
The model is trained and created, but when I try to save it as a hdf5 file, in order to reuse it for testing purposes, the operation simply does not work, and there are no errors popping up in the console…
Here is the full code:
import os
import librosa
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import wavfile
import warnings
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Dropout, Flatten, Conv1D, Input, MaxPooling1D
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.models import load_model
import random
warnings.filterwarnings("ignore")
samples, sample_rate = librosa.load(r'C:\Users\makra\OneDrive\Desktop\Conda\Speech-to-text\train\audio\yes\0a7c2a8d_nohash_0.wav', sr = 16000)
ipd.Audio(samples, rate=sample_rate)
samples = librosa.resample(samples, sample_rate, 8000)
ipd.Audio(samples, rate=8000)
labels=os.listdir(r'C:\Users\makra\OneDrive\Desktop\Conda\Speech-to-text\train\audio')
labels=["yes", "no", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "zero"]
all_wave = []
all_label = []
for label in labels:
print(label)
waves = [f for f in os.listdir(r'C:\Users\makra\OneDrive\Desktop\Conda\Speech-to-text\train\audio' + '/'+ label) if f.endswith('.wav')]
for wav in waves:
samples, sample_rate = librosa.load(r'C:\Users\makra\OneDrive\Desktop\Conda\Speech-to-text\train\audio' + '/' + label + '/' + wav, sr = 16000)
samples = librosa.resample(samples, sample_rate, 8000)
if(len(samples)== 8000) :
all_wave.append(samples)
all_label.append(label)
le = LabelEncoder()
y=le.fit_transform(all_label)
classes= list(le.classes_)
y=np_utils.to_categorical(y, num_classes=len(labels))
all_wave = np.array(all_wave).reshape(-1,8000,1)
x_tr, x_val, y_tr, y_val = train_test_split(np.array(all_wave),np.array(y),stratify=y,test_size = 0.2,random_state=777,shuffle=True)
K.clear_session()
inputs = Input(shape=(8000,1))
#First Conv1D layer
conv = Conv1D(8,13, padding='valid', activation='relu', strides=1)(inputs)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)
#Second Conv1D layer
conv = Conv1D(16, 11, padding='valid', activation='relu', strides=1)(conv)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)
#Third Conv1D layer
conv = Conv1D(32, 9, padding='valid', activation='relu', strides=1)(conv)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)
#Fourth Conv1D layer
conv = Conv1D(64, 7, padding='valid', activation='relu', strides=1)(conv)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)
#Flatten layer
conv = Flatten()(conv)
#Dense Layer 1
conv = Dense(256, activation='relu')(conv)
conv = Dropout(0.3)(conv)
#Dense Layer 2
conv = Dense(128, activation='relu')(conv)
conv = Dropout(0.3)(conv)
outputs = Dense(len(labels), activation='softmax')(conv)
model = Model(inputs, outputs)
model.summary()
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.0001)
mc = ModelCheckpoint('best_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
history=model.fit(x_tr, y_tr ,epochs=100, callbacks=[es,mc], batch_size=32, validation_data=(x_val,y_val))
Since no error is shown on the kernel, I am unable to report any.
I can recommend using save_weights_only=True
option in your ModelCheckpoint
callback and use the API model.load_weights(checkpoint_path)
. This will load the latest model weights as shown here.This is just a temporary solution, I think your model accuracy is not increasing and that is the reason your save_best_only
option fails to save any model as you have put a condition that accuracy should increase in your ModelCheckpoint
callback.