import numpy as np import librosa import librosa.display import matplotlib.pyplot as plt import soundfile as sf import gradio as gr import io import os import base64 def analyze_audio_files(files, folder_path): output_html = "" file_paths = [] # Handle inputs: files can be a list of file paths or a folder path if files: file_paths.extend(files) if folder_path: if os.path.isdir(folder_path): folder_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))] file_paths.extend(folder_files) else: return f"

Folder not found: {folder_path}

" for audio_file in file_paths: try: # Load the audio file y, sr = librosa.load(audio_file, sr=None) # Get original bit depth from file metadata with sf.SoundFile(audio_file) as f: bit_depth_info = f.subtype_info # Time domain analysis duration = len(y) / sr # Frequency domain analysis desired_freq_resolution = 10.0 # in Hz # Calculate n_fft, limit it to a reasonable range n_fft = int(sr / desired_freq_resolution) n_fft = 2 ** int(np.ceil(np.log2(n_fft))) # Next power of two # Set maximum and minimum n_fft to avoid excessive computation max_n_fft = 32768 min_n_fft = 1024 n_fft = min(max(n_fft, min_n_fft), max_n_fft) hop_length = n_fft // 4 # Compute the Short-Time Fourier Transform (STFT) S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)) # Compute the spectrogram (in dB) S_db = librosa.amplitude_to_db(S, ref=np.max) # Average over time to get the frequency spectrum S_mean = np.mean(S, axis=1) freqs = np.linspace(0, sr / 2, len(S_mean)) # Plot the waveform fig_waveform = plt.figure(figsize=(8, 4)) librosa.display.waveshow(y, sr=sr, alpha=0.5) plt.title('Waveform', fontsize=14) plt.xlabel('Time (s)', fontsize=12) plt.ylabel('Amplitude', fontsize=12) plt.tight_layout() waveform_image = io.BytesIO() plt.savefig(waveform_image, format='png', bbox_inches='tight') plt.close(fig_waveform) waveform_image.seek(0) waveform_base64 = base64.b64encode( waveform_image.read()).decode('utf-8') waveform_html = f'Waveform' # Calculate spectral features: spectral centroid, spectral bandwidth, and spectral rolloff spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[ 0] spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[ 0] spectral_rolloff = librosa.feature.spectral_rolloff( y=y, sr=sr, roll_percent=0.85)[0] times = librosa.times_like(spectral_centroids) # Plot the spectral features fig_spectral_features = plt.figure(figsize=(8, 4)) plt.semilogy(times, spectral_centroids, label='Spectral Centroid') plt.semilogy(times, spectral_bandwidth, label='Spectral Bandwidth') plt.semilogy(times, spectral_rolloff, label='Spectral Rolloff', linestyle='--') plt.title('Spectral Features', fontsize=14) plt.xlabel('Time (s)', fontsize=12) plt.ylabel('Hz', fontsize=12) plt.legend(loc='upper right') plt.tight_layout() spectral_features_image = io.BytesIO() plt.savefig(spectral_features_image, format='png', bbox_inches='tight') plt.close(fig_spectral_features) spectral_features_image.seek(0) spectral_features_base64 = base64.b64encode( spectral_features_image.read()).decode('utf-8') spectral_features_html = f'Spectral Features' # Plot the frequency spectrum fig1 = plt.figure(figsize=(8, 4)) plt.semilogx(freqs, 20 * np.log10(S_mean + 1e-10)) # Avoid log(0) plt.xlabel('Frequency (Hz)', fontsize=12) plt.ylabel('Amplitude (dB)', fontsize=12) plt.title('Frequency Spectrum', fontsize=14) plt.grid(True, which='both', ls='--') plt.xlim(20, sr / 2) plt.tight_layout() spectrum_image = io.BytesIO() plt.savefig(spectrum_image, format='png', bbox_inches='tight') plt.close(fig1) spectrum_image.seek(0) spectrum_base64 = base64.b64encode( spectrum_image.read()).decode('utf-8') spectrum_html = f'Frequency Spectrum' # Plot the spectrogram fig3 = plt.figure(figsize=(8, 4)) librosa.display.specshow( S_db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('Spectrogram', fontsize=14) plt.xlabel('Time (s)', fontsize=12) plt.ylabel('Frequency (Hz)', fontsize=12) plt.tight_layout() spectrogram_image = io.BytesIO() plt.savefig(spectrogram_image, format='png', bbox_inches='tight') plt.close(fig3) spectrogram_image.seek(0) spectrogram_base64 = base64.b64encode( spectrogram_image.read()).decode('utf-8') spectrogram_html = f'Spectrogram' # Analyze high-frequency content # Define a threshold relative to the maximum amplitude threshold_db = -80 # dB max_amplitude_db = 20 * np.log10(np.max(S_mean + 1e-10)) threshold_amplitude_db = max_amplitude_db + threshold_db threshold_amplitude = 10 ** (threshold_amplitude_db / 20) # Find the highest frequency with significant content significant_indices = np.where(S_mean >= threshold_amplitude)[0] if len(significant_indices) > 0: highest_freq = freqs[significant_indices[-1]] # Estimate the real sample rate estimated_sample_rate = highest_freq * 2 # Nyquist theorem significant_freq_text = f"{highest_freq:.2f} Hz" estimated_sample_rate_text = f"{estimated_sample_rate / 1000:.2f} kHz" else: significant_freq_text = "No significant frequency content detected." estimated_sample_rate_text = "N/A" # Estimate effective bit depth # Calculate the signal's dynamic range signal_rms = np.sqrt(np.mean(y ** 2)) noise_floor = np.percentile(np.abs(y), 0.1) # Avoid division by zero dynamic_range_db = 20 * \ np.log10(signal_rms / (noise_floor + 1e-10)) estimated_bit_depth = int(np.ceil(dynamic_range_db / 6.02)) # Prepare the output text as an HTML table output_text = f"""

{os.path.basename(audio_file)}

File Bit Depth:{bit_depth_info}
Sample Rate:{sr} Hz
Duration:{duration:.2f} seconds
Using n_fft ={n_fft}
Significant frequency content up to:{significant_freq_text}
Estimated Real Sample Rate:{estimated_sample_rate_text}
Estimated Dynamic Range:{dynamic_range_db:.2f} dB
Estimated Effective Bit Depth:{estimated_bit_depth} bits PCM
""" # Plot histogram of sample values fig2 = plt.figure(figsize=(8, 4)) plt.hist(y, bins=1000, alpha=0.7, color='blue', edgecolor='black', log=True) plt.xlabel('Amplitude', fontsize=12) plt.ylabel('Count (log scale)', fontsize=12) plt.title('Histogram of Sample Amplitudes', fontsize=14) plt.grid(True) plt.tight_layout() histogram_image = io.BytesIO() plt.savefig(histogram_image, format='png', bbox_inches='tight') plt.close(fig2) histogram_image.seek(0) histogram_base64 = base64.b64encode( histogram_image.read()).decode('utf-8') histogram_html = f'Histogram of Sample Amplitudes' # Combine text and images into HTML output_html += f""" {output_text}

Waveform

{waveform_html}

Spectral Features

{spectral_features_html}

Frequency Spectrum

{spectrum_html}

Spectrogram

{spectrogram_html}

Histogram of Sample Amplitudes

{histogram_html}
""" except Exception as e: # Handle errors gracefully output_html += f"

File: {os.path.basename(audio_file)}

Error: {str(e)}


" # Return the aggregated HTML output return output_html with gr.Blocks() as demo: gr.Markdown("Wave Wizard") gr.Markdown( "Upload one or more audio files, or specify a folder containing audio files.") with gr.Row(): file_input = gr.Files(label="Upload Audio Files", type="filepath", file_count="multiple") folder_input = gr.Textbox(label="Folder Path (optional)", placeholder="Enter folder path containing audio files") analyze_button = gr.Button("Analyze") output_display = gr.HTML() def analyze_wrapper(files, folder_path): outputs = analyze_audio_files(files, folder_path) return outputs analyze_button.click(analyze_wrapper, inputs=[ file_input, folder_input], outputs=output_display) demo.launch()