[SOLVED] Apply gain to specific frequencies using pyDub

Apply gain to specific frequencies using pyDub

I want to increase the volume of a specific frequency in a wav file, making them louder (more audible) then the rest of the frequencies.

What I've done so far (or at least I believe to) is to find the specific frequency I want to pitch up, I am stuck in the part I have to increase its gain, it is adding a annoying buzzing sound into my audio.

Here is the code:

import array
from pydub import AudioSegment
from pydub.generators import Sine
import numpy as np
from scipy.fft import fft
import sezone.fmanagement as file_management
import math

def find_frequencies_in_segment(segment: AudioSegment) -> (array, tuple):
    raw_data = np.array(segment.get_array_of_samples())
    fft_result = fft(raw_data)
    freqs = np.fft.fftfreq(len(fft_result), d=1/segment.frame_rate)
    
    return (freqs, fft_result)

def create_audio_sample() -> AudioSegment:
    # in Hz
    FREQUENCY_LOW = 60
    FREQUENCY_MEDIUM = 100

    tone_low: AudioSegment  = Sine(FREQUENCY_LOW).to_audio_segment(duration=500)
    tone_medium: AudioSegment  = Sine(FREQUENCY_MEDIUM).to_audio_segment(duration=125)
    
    audio: AudioSegment = tone_low + tone_medium
    audio = audio - 20 #making it more quiet, so it'd be easier for me notice any difference in the volume after processing.
    
    return audio

def calc_scale_factor(original_sample: AudioSegment, fft_result: array) -> float:
    max_original_amp = np.array(original_sample.get_array_of_samples()).max()
    max_fft_amp = np.array(fft_result).max()

    return max_original_amp/max_fft_amp

def find_indexes_with_bandwidth(samples: array, amplitude: int, bandwith: int) -> np.ndarray:
    indexes = []

    for i in range(len(samples)):
        min_value = samples[i] - bandwith
        max_value = samples[i] + bandwith

        if amplitude >= min_value and amplitude <= max_value:
            indexes.append(i)

    return indexes

def original_sample_index_for_frequency(audio: AudioSegment, frequency: int) -> array:
    # Apply FFT to get the frequencies
    (freqs, fft_result) = find_frequencies_in_segment(audio)

    # spectral magnitude
    amps = np.abs(fft_result)

    # indexes containing my desired frequency
    filtered_indexes = [i for i in range(len(freqs)) if math.floor(freqs[i]) == frequency]

    # the amplitude of the desired frequency. From the spectral magnitude returned by the FFT
    freq_amp = amps[filtered_indexes[0]]
    
    # gets the corresponding amplitude in my original audio sample array
    original_amp = freq_amp * calc_scale_factor(audio, amps)

    original_samples = audio.get_array_of_samples()
                                
    indexes = find_indexes_with_bandwidth(original_samples, original_amp, 2)

    # the indexes in my original array sample corresponding to the frequency i want to pitch up.
    return indexes

def apply_gain_pydub(audio: AudioSegment, indexes: array) -> AudioSegment:
    original_order = audio.get_sample_slice(0, 0)
    processed_index = -1

    for i in indexes:
        # the part of the audio I dont want to change
        original_order += audio.get_sample_slice(processed_index + 1, i - 1)# todo verify index 0
        # the part of the audio I want to change
        audio_to_change = audio.get_sample_slice(i, i+1)
        changed_audio = audio_to_change.apply_gain(20)
        # putting both audios together, in sequence
        original_order += changed_audio
        processed_index = i

    # do the same as the loop above, but for the remainig part of the audio.
    original_order += audio.get_sample_slice(processed_index + 1, len(audio.get_array_of_samples()) - 1)    

    original_order.export("../output/new_audio_pydub.wav", format="wav")

    return original_order

def apply_gain_manually(audio: AudioSegment, indexes: array) -> AudioSegment:
    original_samples = audio.get_array_of_samples()
                                
    # loop for the indexes applying gain (pitching the volume up)                                
    for i in indexes:
        gain = 10**(20 / 20.0)
        original_samples[i] = int(original_samples[i] * gain)

    reconstructed_audio = AudioSegment(original_samples.tobytes(), 
                                       frame_rate=audio.frame_rate,
                                       sample_width=2,
                                       channels=1)
    
    reconstructed_audio.export("../output/new_audio_manually.wav", format="wav")

    return reconstructed_audio

def main():
    audio = create_audio_sample()
    audio.export("../output/audio_sample.wav", format="wav")

    # it gives me the indexes of the frequencies i want to pitch up.
    # in this case, 60Hz
    indexes = original_sample_index_for_frequency(audio, 60)

    # do the same thing, but in differente ways to see if there is any difference
    audio_pydub = apply_gain_pydub(audio, indexes)
    audio_manually = apply_gain_manually(audio, indexes)

    # making sure none audio has been clipped.
    print("maximum possible value: ", (2 ** 15) - 1)# 16 bits
    print("audio max: ", np.array(audio.get_array_of_samples()).max())
    print("audio_pydub max: ", np.array(audio_pydub.get_array_of_samples()).max())
    print("audio_manually max: ", np.array(audio_manually.get_array_of_samples()).max())

    # Both audios have a buzzing sound after the processing.

if __name__ == '__main__':
    print("\n")
    main()
    print("\n")

I want to understand:

What am I doing wrong, why is the buzzing effect being applied into my audio?
Is it possible to achieve what I want following the above logic?
What is the correct way to pitch specific frequencies?

As I am still learning the basics about audio processing, any background or context information would be helpful.

Solution

The code that you have does filtering on the frequency/FFT domain. This can easily lead to artifacts as you have experienced. Instead you should try to do the filtering in the original time/waveform domain. This avoid domain conversions and is much less vulnerable to strange artifacts.

The general filter that boosts a specific frequency range is called a bandpass filter. This can be designed with https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirdesign.html

For very narrow bandpass filters one can use a peak filter / resonant bandpass filter. These can be designed with https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirpeak.html