Vishal Bakshi
08/12/2020
I want to use the fastai
library to teach a resnet learner audio classification. I first need to learn how to create spectograms from audio files.
import os
import matplotlib.pyplot as plt
#for loading and visualizing audio files
import librosa
import librosa.display
#to play audio
import IPython.display as ipd
audio_fpath = "chords/"
audio_clips = os.listdir(audio_fpath)
for path in audio_clips:
print(path)
x, sr = librosa.load(audio_fpath+audio_clips[2], sr=44100)
plt.figure(figsize=(14, 5))
plt.title('D minor Waveform - Piano')
librosa.display.waveplot(x, sr=sr)
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
plt.title('D minor Spectogram - Piano')
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.ylim(0,12500)
plt.colorbar()
fig = plt.figure(1)
for i in range(len(audio_clips)):
x, sr = librosa.load(audio_fpath+audio_clips[2], sr=44100)
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.ylim(0,12500)
fig.set_figheight(30)
fig.set_figwidth(20)
ax = fig.add_subplot(7,2,i+1)
dot_idx = audio_clips[i].find(".")
ax.title.set_text(audio_clips[i][:dot_idx] + " Waveform - Piano")
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
fig.tight_layout()