Spaces:
Running
Running
File size: 10,990 Bytes
141afc1 ce656ac 141afc1 ce656ac 141afc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import soundfile as sf
import gradio as gr
import io
import os
import base64
def analyze_audio_files(files, folder_path):
output_html = ""
file_paths = []
# Handle inputs: files can be a list of file paths or a folder path
if files:
file_paths.extend(files)
if folder_path:
if os.path.isdir(folder_path):
folder_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)
if os.path.isfile(os.path.join(folder_path, f))]
file_paths.extend(folder_files)
else:
return f"<p><strong>Folder not found:</strong> {folder_path}</p>"
for audio_file in file_paths:
try:
# Load the audio file
y, sr = librosa.load(audio_file, sr=None)
# Get original bit depth from file metadata
with sf.SoundFile(audio_file) as f:
bit_depth_info = f.subtype_info
# Time domain analysis
duration = len(y) / sr
# Frequency domain analysis
desired_freq_resolution = 10.0 # in Hz
# Calculate n_fft, limit it to a reasonable range
n_fft = int(sr / desired_freq_resolution)
n_fft = 2 ** int(np.ceil(np.log2(n_fft))) # Next power of two
# Set maximum and minimum n_fft to avoid excessive computation
max_n_fft = 32768
min_n_fft = 1024
n_fft = min(max(n_fft, min_n_fft), max_n_fft)
hop_length = n_fft // 4
# Compute the Short-Time Fourier Transform (STFT)
S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
# Compute the spectrogram (in dB)
S_db = librosa.amplitude_to_db(S, ref=np.max)
# Average over time to get the frequency spectrum
S_mean = np.mean(S, axis=1)
freqs = np.linspace(0, sr / 2, len(S_mean))
# Plot the waveform
fig_waveform = plt.figure(figsize=(8, 4))
librosa.display.waveshow(y, sr=sr, alpha=0.5)
plt.title('Waveform', fontsize=14)
plt.xlabel('Time (s)', fontsize=12)
plt.ylabel('Amplitude', fontsize=12)
plt.tight_layout()
waveform_image = io.BytesIO()
plt.savefig(waveform_image, format='png', bbox_inches='tight')
plt.close(fig_waveform)
waveform_image.seek(0)
waveform_base64 = base64.b64encode(
waveform_image.read()).decode('utf-8')
waveform_html = f'<img src="data:image/png;base64,{waveform_base64}" alt="Waveform">'
# Calculate spectral features: spectral centroid, spectral bandwidth, and spectral rolloff
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[
0]
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[
0]
spectral_rolloff = librosa.feature.spectral_rolloff(
y=y, sr=sr, roll_percent=0.85)[0]
times = librosa.times_like(spectral_centroids)
# Plot the spectral features
fig_spectral_features = plt.figure(figsize=(8, 4))
plt.semilogy(times, spectral_centroids, label='Spectral Centroid')
plt.semilogy(times, spectral_bandwidth, label='Spectral Bandwidth')
plt.semilogy(times, spectral_rolloff,
label='Spectral Rolloff', linestyle='--')
plt.title('Spectral Features', fontsize=14)
plt.xlabel('Time (s)', fontsize=12)
plt.ylabel('Hz', fontsize=12)
plt.legend(loc='upper right')
plt.tight_layout()
spectral_features_image = io.BytesIO()
plt.savefig(spectral_features_image,
format='png', bbox_inches='tight')
plt.close(fig_spectral_features)
spectral_features_image.seek(0)
spectral_features_base64 = base64.b64encode(
spectral_features_image.read()).decode('utf-8')
spectral_features_html = f'<img src="data:image/png;base64,{spectral_features_base64}" alt="Spectral Features">'
# Plot the frequency spectrum
fig1 = plt.figure(figsize=(8, 4))
plt.semilogx(freqs, 20 * np.log10(S_mean + 1e-10)) # Avoid log(0)
plt.xlabel('Frequency (Hz)', fontsize=12)
plt.ylabel('Amplitude (dB)', fontsize=12)
plt.title('Frequency Spectrum', fontsize=14)
plt.grid(True, which='both', ls='--')
plt.xlim(20, sr / 2)
plt.tight_layout()
spectrum_image = io.BytesIO()
plt.savefig(spectrum_image, format='png', bbox_inches='tight')
plt.close(fig1)
spectrum_image.seek(0)
spectrum_base64 = base64.b64encode(
spectrum_image.read()).decode('utf-8')
spectrum_html = f'<img src="data:image/png;base64,{spectrum_base64}" alt="Frequency Spectrum">'
# Plot the spectrogram
fig3 = plt.figure(figsize=(8, 4))
librosa.display.specshow(
S_db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length)
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram', fontsize=14)
plt.xlabel('Time (s)', fontsize=12)
plt.ylabel('Frequency (Hz)', fontsize=12)
plt.tight_layout()
spectrogram_image = io.BytesIO()
plt.savefig(spectrogram_image, format='png', bbox_inches='tight')
plt.close(fig3)
spectrogram_image.seek(0)
spectrogram_base64 = base64.b64encode(
spectrogram_image.read()).decode('utf-8')
spectrogram_html = f'<img src="data:image/png;base64,{spectrogram_base64}" alt="Spectrogram">'
# Analyze high-frequency content
# Define a threshold relative to the maximum amplitude
threshold_db = -80 # dB
max_amplitude_db = 20 * np.log10(np.max(S_mean + 1e-10))
threshold_amplitude_db = max_amplitude_db + threshold_db
threshold_amplitude = 10 ** (threshold_amplitude_db / 20)
# Find the highest frequency with significant content
significant_indices = np.where(S_mean >= threshold_amplitude)[0]
if len(significant_indices) > 0:
highest_freq = freqs[significant_indices[-1]]
# Estimate the real sample rate
estimated_sample_rate = highest_freq * 2 # Nyquist theorem
significant_freq_text = f"{highest_freq:.2f} Hz"
estimated_sample_rate_text = f"{estimated_sample_rate / 1000:.2f} kHz"
else:
significant_freq_text = "No significant frequency content detected."
estimated_sample_rate_text = "N/A"
# Estimate effective bit depth
# Calculate the signal's dynamic range
signal_rms = np.sqrt(np.mean(y ** 2))
noise_floor = np.percentile(np.abs(y), 0.1)
# Avoid division by zero
dynamic_range_db = 20 * \
np.log10(signal_rms / (noise_floor + 1e-10))
estimated_bit_depth = int(np.ceil(dynamic_range_db / 6.02))
# Prepare the output text as an HTML table
output_text = f"""
<h3 style="font-size:22px;">{os.path.basename(audio_file)}</h3>
<table style="font-size:18px;">
<tr><td><strong>File Bit Depth:</strong></td><td>{bit_depth_info}</td></tr>
<tr><td><strong>Sample Rate:</strong></td><td>{sr} Hz</td></tr>
<tr><td><strong>Duration:</strong></td><td>{duration:.2f} seconds</td></tr>
<tr><td><strong>Using n_fft =</strong></td><td>{n_fft}</td></tr>
<tr><td><strong>Significant frequency content up to:</strong></td><td>{significant_freq_text}</td></tr>
<tr><td><strong>Estimated Real Sample Rate:</strong></td><td>{estimated_sample_rate_text}</td></tr>
<tr><td><strong>Estimated Dynamic Range:</strong></td><td>{dynamic_range_db:.2f} dB</td></tr>
<tr><td><strong>Estimated Effective Bit Depth:</strong></td><td>{estimated_bit_depth} bits PCM</td></tr>
</table>
"""
# Plot histogram of sample values
fig2 = plt.figure(figsize=(8, 4))
plt.hist(y, bins=1000, alpha=0.7, color='blue',
edgecolor='black', log=True)
plt.xlabel('Amplitude', fontsize=12)
plt.ylabel('Count (log scale)', fontsize=12)
plt.title('Histogram of Sample Amplitudes', fontsize=14)
plt.grid(True)
plt.tight_layout()
histogram_image = io.BytesIO()
plt.savefig(histogram_image, format='png', bbox_inches='tight')
plt.close(fig2)
histogram_image.seek(0)
histogram_base64 = base64.b64encode(
histogram_image.read()).decode('utf-8')
histogram_html = f'<img src="data:image/png;base64,{histogram_base64}" alt="Histogram of Sample Amplitudes">'
# Combine text and images into HTML
output_html += f"""
{output_text}
<h4 style="font-size:20px;">Waveform</h4>
{waveform_html}
<h4 style="font-size:20px;">Spectral Features</h4>
{spectral_features_html}
<h4 style="font-size:20px;">Frequency Spectrum</h4>
{spectrum_html}
<h4 style="font-size:20px;">Spectrogram</h4>
{spectrogram_html}
<h4 style="font-size:20px;">Histogram of Sample Amplitudes</h4>
{histogram_html}
<hr>
"""
except Exception as e:
# Handle errors gracefully
output_html += f"<p><strong>File:</strong> {os.path.basename(audio_file)}</p><p><strong>Error:</strong> {str(e)}</p><hr>"
# Return the aggregated HTML output
return output_html
with gr.Blocks() as demo:
gr.Markdown("Wave Wizard")
gr.Markdown(
"Upload one or more audio files, or specify a folder containing audio files.")
with gr.Row():
file_input = gr.Files(label="Upload Audio Files",
type="filepath", file_count="multiple")
folder_input = gr.Textbox(label="Folder Path (optional)",
placeholder="Enter folder path containing audio files")
analyze_button = gr.Button("Analyze")
output_display = gr.HTML()
def analyze_wrapper(files, folder_path):
outputs = analyze_audio_files(files, folder_path)
return outputs
analyze_button.click(analyze_wrapper, inputs=[
file_input, folder_input], outputs=output_display)
demo.launch()
|