Regarding noise removal by "Spectral noise removal"

Asked 2 years ago, Updated 2 years ago, 17 views

Play back audio with "Spectral Noise Removal"

When you run the program in the above question, the following message is displayed and the audio is played, but the noise removal cannot be completed.
Please tell me how to deal with it.

error message

FutureWarning: Pass hop_length=512, win_length=2048 as keyword args.From version 0.10 passing these as positional arguments will result in an error
  return librosa.istft(y,hop_length,win_length)

source code

import numpy as np
from scope.ndimage import maximum_filter1d

default(y, rate, threshold):
    """
    Args:
        - y —Signal data
        - rate —Sampling frequency
        - threshold —Threshold for noise determination
    Returns:
        - mask —Whether the amplitude is greater than or equal to the threshold.
        - y_mean —Sound Envelop
    """
    y_mean=maximum_filter1d(np.abs(y), mode="constant", size=rate//20)
    mask = [ mean > threshold for mean in y_mean ]
    return mask, y_mean

import librosa


n_ft = 2048 # Number of voice frames between STFT columns
hop_length = 512 # Number of voice frames between STFT columns
win_length=2048# Window Size
n_std_thresh=1.5# Threshold for how many standard deviations (dB of the mean at each frequency level) are greater than the mean of the noise to be considered a signal

def_stft(y,n_ft,hop_length,win_length):
    return librosa.stft(y=y, n_ft=n_ft, hop_length=hop_length, win_length=win_length)

def_amp_to_db(x):
    return librosa.core.amplitude_to_db(x,ref=1.0,amin=1e-20,top_db=80.0)



sample_rate = 32000

# Loading Audio Files
# noise_clip = open(r "C:\Users\1818067\birdvoice.wav")

path=r'C:\Users\Owner\Desktop\elect\birdvoice.wav'
sig,_=librosa.load(path,sr=sample_rate)

# noise data acquisition
mask, noise_clip = envelope(sig, sample_rate, threshold = 0.03)




noise_stft =_stft(noise_clip, n_ft, hop_length, win_length)
noise_stft_db =_amp_to_db(np.abs(noise_stft))# Convert to dB

mean_freq_noise=np.mean(noise_stft_db,axis=1)
std_freq_noise=np.std(noise_stft_db,axis=1)
noise_thresh = mean_freq_noise+std_freq_noise*n_std_thresh


import scipy


n_grad_freq = 2# Number of frequency channels to be smoothed by the mask
n_grad_time=4#Number of time channels to smooth using the mask
prop_decrease=1.0# How much noise do you reduce?


# data=open(r'C:\Users\1818067\birdvoice.wav')
#audio_clip=envelope(data).envelop

sample_rate = 32000

# Loading Audio Files
# noise_clip = open(r "C:\Users\1818067\birdvoice.wav")

path=r'C:\Users\Owner\Desktop\elect\birdvoice.wav'
sig,_=librosa.load(path,sr=sample_rate)

# noise data acquisition
audio_clip, rate=librosa.load('birdvoice.wav')


# A sound source is also extracted by STFT.
sig_stft =_stft(audio_clip, n_ft, hop_length, win_length)
sig_stft_db =_amp_to_db(np.abs(sig_stft))

# Create mask smoothing filters over time and frequency
smoothing_filter=np.outer(
        np.concatenate(
            [
                np.linspace(0,1,n_grad_freq+1,endpoint=False),
                np.linspace(1,0, n_grad_freq+2),
            ]
        )[1:-1],
        np.concatenate(
            [
                np.linspace(0,1,n_grad_time+1,endpoint=False),
                np.linspace(1,0, n_grad_time+2),
            ]
        )[1:-1],
    )
smoothing_filter=smoothing_filter/np.sum(smoothing_filter)

# Calculating Time and Frequency Thresholds
db_thresh=np.repeat(
        np.reshape (noise_thresh, [1, len(mean_freq_noise)],
        np.shape(sig_stft_db)[1],
        axis = 0,
    ) .T
sig_mask = sig_stft_db<db_thresh
sig_mask=scipy.signal.ftconvolve(sig_mask, smoothing_filter, mode="same")
sig_mask = sig_mask *prop_decrease

mask_gain_dB = np.min(_amp_to_db(np.abs(sig_stft)))

def_db_to_amp(x,):
    return librosa.core.db_to_amplitude(x,ref=1.0)

sig_stft_db_masked=(
        sig_stft_db*(1-sig_mask)
        + np.ones(np.shape(mask_gain_dB))*mask_gain_dB*sig_mask
)

def_istft(y,hop_length,win_length):
    return librosa.istft(hop_length, win_length)

sig_imag_masked=np.imag(sig_stft)*(1-sig_mask)
sig_stft_amp=(_db_to_amp(sig_stft_db_masked)*np.sign(sig_stft))+(1j*sig_imag_masked)

recovered_signal=_istft(sig_stft_amp, hop_length, win_length)

import soundfile as sf

sf.write('clearvoice.wav', recovered_signal, 22050, subtype='PCM_16')

from playsound import playsound

playsound('clearvoice.wav')

python

2022-09-30 17:33

1 Answers

In the end, it seems that there was a lot of redundancy left.
You don't need to read or convert files many times by doing the following:
The first part to be commented with ####### is the change.

#########################################################
import numpy as np
import librosa
import scipy

######## Just load the original audio data file as it is, no conversion required
audio_clip,_=librosa.load(r'C:\Users\Owner\Desktop\elect\birdvoice.wav')

n_ft = 2048 # Number of voice frames between STFT columns
hop_length = 512 # Number of voice frames between STFT columns
win_length=2048# Window Size
n_std_thresh=1.5# Threshold for how many standard deviations (dB of the mean at each frequency level) are greater than the mean of the noise to be considered a signal

def_stft(y,n_ft,hop_length,win_length):
    return librosa.stft(y=y, n_ft=n_ft, hop_length=hop_length, win_length=win_length)

def_amp_to_db(x):
    return librosa.core.amplitude_to_db(x,ref=1.0,amin=1e-20,top_db=80.0)

######## No audio file conversion required, so delete and read first

######## Change the noise_clip in the line below to audio_clip
noise_stft=_stft(audio_clip, n_ft, hop_length, win_length)
noise_stft_db =_amp_to_db(np.abs(noise_stft))# Convert to dB

mean_freq_noise=np.mean(noise_stft_db,axis=1)
std_freq_noise=np.std(noise_stft_db,axis=1)
noise_thresh = mean_freq_noise+std_freq_noise*n_std_thresh

n_grad_freq = 2# Number of frequency channels to be smoothed by the mask
n_grad_time=4#Number of time channels to smooth using the mask
prop_decrease=1.0# How much noise do you reduce?

######## No need to read audio files/noise data

# A sound source is also extracted by STFT.
sig_stft =_stft(audio_clip, n_ft, hop_length, win_length)
sig_stft_db =_amp_to_db(np.abs(sig_stft))

# Create mask smoothing filters over time and frequency
smoothing_filter=np.outer(
        np.concatenate(
            [
                np.linspace(0,1,n_grad_freq+1,endpoint=False),
                np.linspace(1,0, n_grad_freq+2),
            ]
        )[1:-1],
        np.concatenate(
            [
                np.linspace(0,1,n_grad_time+1,endpoint=False),
                np.linspace(1,0, n_grad_time+2),
            ]
        )[1:-1],
    )
smoothing_filter=smoothing_filter/np.sum(smoothing_filter)

# Calculating Time and Frequency Thresholds
db_thresh=np.repeat(
        np.reshape (noise_thresh, [1, len(mean_freq_noise)],
        np.shape(sig_stft_db)[1],
        axis = 0,
    ) .T
sig_mask = sig_stft_db<db_thresh
sig_mask=scipy.signal.ftconvolve(sig_mask, smoothing_filter, mode="same")
sig_mask = sig_mask *prop_decrease

mask_gain_dB = np.min(_amp_to_db(np.abs(sig_stft)))

def_db_to_amp(x,):
    return librosa.core.db_to_amplitude(x,ref=1.0)

sig_stft_db_masked=(
        sig_stft_db*(1-sig_mask)
        + np.ones(np.shape(mask_gain_dB))*mask_gain_dB*sig_mask
)

def_istft(y,hop_length,win_length):
    return librosa.istft(y,hop_length,win_length)############################################################################

sig_imag_masked=np.imag(sig_stft)*(1-sig_mask)
sig_stft_amp=(_db_to_amp(sig_stft_db_masked)*np.sign(sig_stft))+(1j*sig_imag_masked)

recovered_signal=_istft(sig_stft_amp, hop_length, win_length)

import soundfile as sf

sf.write('clearvoice.wav', recovered_signal, 22050, subtype='PCM_16')

from playsound import playsound

playsound('clearvoice.wav')

Also, the part that says FutureWarning:... is a warning, not an error.
It may have some effect, but there seems to be no problem so far.

add

According to @metropolis's comment, the warning message no longer appears if you specify the keyword as follows.

def_istft(y,hop_length,win_length):
    return librosa.istft(y,hop_length=hop_length,win_length=win_length)


2022-09-30 17:33

If you have any answers or tips


© 2024 OneMinuteCode. All rights reserved.