WaveWizard / app.py
jackvinati's picture
Update app.py
ce656ac verified
raw
history blame contribute delete
No virus
11 kB
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import soundfile as sf
import gradio as gr
import io
import os
import base64
def analyze_audio_files(files, folder_path):
output_html = ""
file_paths = []
# Handle inputs: files can be a list of file paths or a folder path
if files:
file_paths.extend(files)
if folder_path:
if os.path.isdir(folder_path):
folder_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)
if os.path.isfile(os.path.join(folder_path, f))]
file_paths.extend(folder_files)
else:
return f"<p><strong>Folder not found:</strong> {folder_path}</p>"
for audio_file in file_paths:
try:
# Load the audio file
y, sr = librosa.load(audio_file, sr=None)
# Get original bit depth from file metadata
with sf.SoundFile(audio_file) as f:
bit_depth_info = f.subtype_info
# Time domain analysis
duration = len(y) / sr
# Frequency domain analysis
desired_freq_resolution = 10.0 # in Hz
# Calculate n_fft, limit it to a reasonable range
n_fft = int(sr / desired_freq_resolution)
n_fft = 2 ** int(np.ceil(np.log2(n_fft))) # Next power of two
# Set maximum and minimum n_fft to avoid excessive computation
max_n_fft = 32768
min_n_fft = 1024
n_fft = min(max(n_fft, min_n_fft), max_n_fft)
hop_length = n_fft // 4
# Compute the Short-Time Fourier Transform (STFT)
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
# Compute the spectrogram (in dB)
S_db = librosa.amplitude_to_db(S, ref=np.max)
# Average over time to get the frequency spectrum
S_mean = np.mean(S, axis=1)
freqs = np.linspace(0, sr / 2, len(S_mean))
# Plot the waveform
fig_waveform = plt.figure(figsize=(8, 4))
librosa.display.waveshow(y, sr=sr, alpha=0.5)
plt.title('Waveform', fontsize=14)
plt.xlabel('Time (s)', fontsize=12)
plt.ylabel('Amplitude', fontsize=12)
plt.tight_layout()
waveform_image = io.BytesIO()
plt.savefig(waveform_image, format='png', bbox_inches='tight')
plt.close(fig_waveform)
waveform_image.seek(0)
waveform_base64 = base64.b64encode(
waveform_image.read()).decode('utf-8')
waveform_html = f'<img src="data:image/png;base64,{waveform_base64}" alt="Waveform">'
# Calculate spectral features: spectral centroid, spectral bandwidth, and spectral rolloff
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[
0]
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[
0]
spectral_rolloff = librosa.feature.spectral_rolloff(
y=y, sr=sr, roll_percent=0.85)[0]
times = librosa.times_like(spectral_centroids)
# Plot the spectral features
fig_spectral_features = plt.figure(figsize=(8, 4))
plt.semilogy(times, spectral_centroids, label='Spectral Centroid')
plt.semilogy(times, spectral_bandwidth, label='Spectral Bandwidth')
plt.semilogy(times, spectral_rolloff,
label='Spectral Rolloff', linestyle='--')
plt.title('Spectral Features', fontsize=14)
plt.xlabel('Time (s)', fontsize=12)
plt.ylabel('Hz', fontsize=12)
plt.legend(loc='upper right')
plt.tight_layout()
spectral_features_image = io.BytesIO()
plt.savefig(spectral_features_image,
format='png', bbox_inches='tight')
plt.close(fig_spectral_features)
spectral_features_image.seek(0)
spectral_features_base64 = base64.b64encode(
spectral_features_image.read()).decode('utf-8')
spectral_features_html = f'<img src="data:image/png;base64,{spectral_features_base64}" alt="Spectral Features">'
# Plot the frequency spectrum
fig1 = plt.figure(figsize=(8, 4))
plt.semilogx(freqs, 20 * np.log10(S_mean + 1e-10)) # Avoid log(0)
plt.xlabel('Frequency (Hz)', fontsize=12)
plt.ylabel('Amplitude (dB)', fontsize=12)
plt.title('Frequency Spectrum', fontsize=14)
plt.grid(True, which='both', ls='--')
plt.xlim(20, sr / 2)
plt.tight_layout()
spectrum_image = io.BytesIO()
plt.savefig(spectrum_image, format='png', bbox_inches='tight')
plt.close(fig1)
spectrum_image.seek(0)
spectrum_base64 = base64.b64encode(
spectrum_image.read()).decode('utf-8')
spectrum_html = f'<img src="data:image/png;base64,{spectrum_base64}" alt="Frequency Spectrum">'
# Plot the spectrogram
fig3 = plt.figure(figsize=(8, 4))
librosa.display.specshow(
S_db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length)
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram', fontsize=14)
plt.xlabel('Time (s)', fontsize=12)
plt.ylabel('Frequency (Hz)', fontsize=12)
plt.tight_layout()
spectrogram_image = io.BytesIO()
plt.savefig(spectrogram_image, format='png', bbox_inches='tight')
plt.close(fig3)
spectrogram_image.seek(0)
spectrogram_base64 = base64.b64encode(
spectrogram_image.read()).decode('utf-8')
spectrogram_html = f'<img src="data:image/png;base64,{spectrogram_base64}" alt="Spectrogram">'
# Analyze high-frequency content
# Define a threshold relative to the maximum amplitude
threshold_db = -80 # dB
max_amplitude_db = 20 * np.log10(np.max(S_mean + 1e-10))
threshold_amplitude_db = max_amplitude_db + threshold_db
threshold_amplitude = 10 ** (threshold_amplitude_db / 20)
# Find the highest frequency with significant content
significant_indices = np.where(S_mean >= threshold_amplitude)[0]
if len(significant_indices) > 0:
highest_freq = freqs[significant_indices[-1]]
# Estimate the real sample rate
estimated_sample_rate = highest_freq * 2 # Nyquist theorem
significant_freq_text = f"{highest_freq:.2f} Hz"
estimated_sample_rate_text = f"{estimated_sample_rate / 1000:.2f} kHz"
else:
significant_freq_text = "No significant frequency content detected."
estimated_sample_rate_text = "N/A"
# Estimate effective bit depth
# Calculate the signal's dynamic range
signal_rms = np.sqrt(np.mean(y ** 2))
noise_floor = np.percentile(np.abs(y), 0.1)
# Avoid division by zero
dynamic_range_db = 20 * \
np.log10(signal_rms / (noise_floor + 1e-10))
estimated_bit_depth = int(np.ceil(dynamic_range_db / 6.02))
# Prepare the output text as an HTML table
output_text = f"""
<h3 style="font-size:22px;">{os.path.basename(audio_file)}</h3>
<table style="font-size:18px;">
<tr><td><strong>File Bit Depth:</strong></td><td>{bit_depth_info}</td></tr>
<tr><td><strong>Sample Rate:</strong></td><td>{sr} Hz</td></tr>
<tr><td><strong>Duration:</strong></td><td>{duration:.2f} seconds</td></tr>
<tr><td><strong>Using n_fft =</strong></td><td>{n_fft}</td></tr>
<tr><td><strong>Significant frequency content up to:</strong></td><td>{significant_freq_text}</td></tr>
<tr><td><strong>Estimated Real Sample Rate:</strong></td><td>{estimated_sample_rate_text}</td></tr>
<tr><td><strong>Estimated Dynamic Range:</strong></td><td>{dynamic_range_db:.2f} dB</td></tr>
<tr><td><strong>Estimated Effective Bit Depth:</strong></td><td>{estimated_bit_depth} bits PCM</td></tr>
</table>
"""
# Plot histogram of sample values
fig2 = plt.figure(figsize=(8, 4))
plt.hist(y, bins=1000, alpha=0.7, color='blue',
edgecolor='black', log=True)
plt.xlabel('Amplitude', fontsize=12)
plt.ylabel('Count (log scale)', fontsize=12)
plt.title('Histogram of Sample Amplitudes', fontsize=14)
plt.grid(True)
plt.tight_layout()
histogram_image = io.BytesIO()
plt.savefig(histogram_image, format='png', bbox_inches='tight')
plt.close(fig2)
histogram_image.seek(0)
histogram_base64 = base64.b64encode(
histogram_image.read()).decode('utf-8')
histogram_html = f'<img src="data:image/png;base64,{histogram_base64}" alt="Histogram of Sample Amplitudes">'
# Combine text and images into HTML
output_html += f"""
{output_text}
<h4 style="font-size:20px;">Waveform</h4>
{waveform_html}
<h4 style="font-size:20px;">Spectral Features</h4>
{spectral_features_html}
<h4 style="font-size:20px;">Frequency Spectrum</h4>
{spectrum_html}
<h4 style="font-size:20px;">Spectrogram</h4>
{spectrogram_html}
<h4 style="font-size:20px;">Histogram of Sample Amplitudes</h4>
{histogram_html}
<hr>
"""
except Exception as e:
# Handle errors gracefully
output_html += f"<p><strong>File:</strong> {os.path.basename(audio_file)}</p><p><strong>Error:</strong> {str(e)}</p><hr>"
# Return the aggregated HTML output
return output_html
with gr.Blocks() as demo:
gr.Markdown("Wave Wizard")
gr.Markdown(
"Upload one or more audio files, or specify a folder containing audio files.")
with gr.Row():
file_input = gr.Files(label="Upload Audio Files",
type="filepath", file_count="multiple")
folder_input = gr.Textbox(label="Folder Path (optional)",
placeholder="Enter folder path containing audio files")
analyze_button = gr.Button("Analyze")
output_display = gr.HTML()
def analyze_wrapper(files, folder_path):
outputs = analyze_audio_files(files, folder_path)
return outputs
analyze_button.click(analyze_wrapper, inputs=[
file_input, folder_input], outputs=output_display)
demo.launch()