jackvinati commited on
Commit
141afc1
1 Parent(s): 912effa

wavewizard_app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -0
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import librosa
3
+ import librosa.display
4
+ import matplotlib.pyplot as plt
5
+ import soundfile as sf
6
+ import gradio as gr
7
+ import io
8
+ import os
9
+ import base64
10
+
11
+
12
+ def analyze_audio_files(files, folder_path):
13
+ output_html = ""
14
+ file_paths = []
15
+
16
+ # Handle inputs: files can be a list of file paths or a folder path
17
+ if files:
18
+ file_paths.extend(files)
19
+ if folder_path:
20
+ if os.path.isdir(folder_path):
21
+ folder_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)
22
+ if os.path.isfile(os.path.join(folder_path, f))]
23
+ file_paths.extend(folder_files)
24
+ else:
25
+ return f"<p><strong>Folder not found:</strong> {folder_path}</p>"
26
+
27
+ for audio_file in file_paths:
28
+ try:
29
+ # Load the audio file
30
+ y, sr = librosa.load(audio_file, sr=None)
31
+
32
+ # Get original bit depth from file metadata
33
+ with sf.SoundFile(audio_file) as f:
34
+ bit_depth_info = f.subtype_info
35
+
36
+ # Time domain analysis
37
+ duration = len(y) / sr
38
+
39
+ # Frequency domain analysis
40
+ desired_freq_resolution = 10.0 # in Hz
41
+
42
+ # Calculate n_fft, limit it to a reasonable range
43
+ n_fft = int(sr / desired_freq_resolution)
44
+ n_fft = 2 ** int(np.ceil(np.log2(n_fft))) # Next power of two
45
+
46
+ # Set maximum and minimum n_fft to avoid excessive computation
47
+ max_n_fft = 32768
48
+ min_n_fft = 1024
49
+ n_fft = min(max(n_fft, min_n_fft), max_n_fft)
50
+
51
+ hop_length = n_fft // 4
52
+
53
+ # Compute the Short-Time Fourier Transform (STFT)
54
+ S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
55
+
56
+ # Compute the spectrogram (in dB)
57
+ S_db = librosa.amplitude_to_db(S, ref=np.max)
58
+
59
+ # Average over time to get the frequency spectrum
60
+ S_mean = np.mean(S, axis=1)
61
+ freqs = np.linspace(0, sr / 2, len(S_mean))
62
+
63
+ # Plot the frequency spectrum
64
+ fig1 = plt.figure(figsize=(8, 4))
65
+ plt.semilogx(freqs, 20 * np.log10(S_mean + 1e-10)) # Avoid log(0)
66
+ plt.xlabel('Frequency (Hz)', fontsize=12)
67
+ plt.ylabel('Amplitude (dB)', fontsize=12)
68
+ plt.title('Frequency Spectrum', fontsize=14)
69
+ plt.grid(True, which='both', ls='--')
70
+ plt.xlim(20, sr / 2)
71
+ plt.tight_layout()
72
+ spectrum_image = io.BytesIO()
73
+ plt.savefig(spectrum_image, format='png', bbox_inches='tight')
74
+ plt.close(fig1)
75
+ spectrum_image.seek(0)
76
+ spectrum_base64 = base64.b64encode(
77
+ spectrum_image.read()).decode('utf-8')
78
+ spectrum_html = f'<img src="data:image/png;base64,{spectrum_base64}" alt="Frequency Spectrum">'
79
+
80
+ # Plot the spectrogram
81
+ fig3 = plt.figure(figsize=(8, 4))
82
+ librosa.display.specshow(
83
+ S_db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length)
84
+ plt.colorbar(format='%+2.0f dB')
85
+ plt.title('Spectrogram', fontsize=14)
86
+ plt.xlabel('Time (s)', fontsize=12)
87
+ plt.ylabel('Frequency (Hz)', fontsize=12)
88
+ plt.tight_layout()
89
+ spectrogram_image = io.BytesIO()
90
+ plt.savefig(spectrogram_image, format='png', bbox_inches='tight')
91
+ plt.close(fig3)
92
+ spectrogram_image.seek(0)
93
+ spectrogram_base64 = base64.b64encode(
94
+ spectrogram_image.read()).decode('utf-8')
95
+ spectrogram_html = f'<img src="data:image/png;base64,{spectrogram_base64}" alt="Spectrogram">'
96
+
97
+ # Analyze high-frequency content
98
+ # Define a threshold relative to the maximum amplitude
99
+ threshold_db = -80 # dB
100
+ max_amplitude_db = 20 * np.log10(np.max(S_mean + 1e-10))
101
+ threshold_amplitude_db = max_amplitude_db + threshold_db
102
+ threshold_amplitude = 10 ** (threshold_amplitude_db / 20)
103
+
104
+ # Find the highest frequency with significant content
105
+ significant_indices = np.where(S_mean >= threshold_amplitude)[0]
106
+ if len(significant_indices) > 0:
107
+ highest_freq = freqs[significant_indices[-1]]
108
+
109
+ # Estimate the real sample rate
110
+ estimated_sample_rate = highest_freq * 2 # Nyquist theorem
111
+
112
+ significant_freq_text = f"{highest_freq:.2f} Hz"
113
+ estimated_sample_rate_text = f"{estimated_sample_rate / 1000:.2f} kHz"
114
+ else:
115
+ significant_freq_text = "No significant frequency content detected."
116
+ estimated_sample_rate_text = "N/A"
117
+
118
+ # Estimate effective bit depth
119
+ # Calculate the signal's dynamic range
120
+ signal_rms = np.sqrt(np.mean(y ** 2))
121
+ noise_floor = np.percentile(np.abs(y), 0.1)
122
+ # Avoid division by zero
123
+ dynamic_range_db = 20 * \
124
+ np.log10(signal_rms / (noise_floor + 1e-10))
125
+
126
+ estimated_bit_depth = int(np.ceil(dynamic_range_db / 6.02))
127
+
128
+ # Prepare the output text as an HTML table
129
+ output_text = f"""
130
+ <h3 style="font-size:22px;">{os.path.basename(audio_file)}</h3>
131
+ <table style="font-size:18px;">
132
+ <tr><td><strong>File Bit Depth:</strong></td><td>{bit_depth_info}</td></tr>
133
+ <tr><td><strong>Sample Rate:</strong></td><td>{sr} Hz</td></tr>
134
+ <tr><td><strong>Duration:</strong></td><td>{duration:.2f} seconds</td></tr>
135
+ <tr><td><strong>Using n_fft =</strong></td><td>{n_fft}</td></tr>
136
+ <tr><td><strong>Significant frequency content up to:</strong></td><td>{significant_freq_text}</td></tr>
137
+ <tr><td><strong>Estimated Real Sample Rate:</strong></td><td>{estimated_sample_rate_text}</td></tr>
138
+ <tr><td><strong>Estimated Dynamic Range:</strong></td><td>{dynamic_range_db:.2f} dB</td></tr>
139
+ <tr><td><strong>Estimated Effective Bit Depth:</strong></td><td>{estimated_bit_depth} bits PCM</td></tr>
140
+ </table>
141
+ """
142
+
143
+ # Plot histogram of sample values
144
+ fig2 = plt.figure(figsize=(8, 4))
145
+ plt.hist(y, bins=1000, alpha=0.7, color='blue',
146
+ edgecolor='black', log=True)
147
+ plt.xlabel('Amplitude', fontsize=12)
148
+ plt.ylabel('Count (log scale)', fontsize=12)
149
+ plt.title('Histogram of Sample Amplitudes', fontsize=14)
150
+ plt.grid(True)
151
+ plt.tight_layout()
152
+ histogram_image = io.BytesIO()
153
+ plt.savefig(histogram_image, format='png', bbox_inches='tight')
154
+ plt.close(fig2)
155
+ histogram_image.seek(0)
156
+ histogram_base64 = base64.b64encode(
157
+ histogram_image.read()).decode('utf-8')
158
+ histogram_html = f'<img src="data:image/png;base64,{histogram_base64}" alt="Histogram of Sample Amplitudes">'
159
+
160
+ # Combine text and images into HTML
161
+ output_html += f"""
162
+ {output_text}
163
+ <h4 style="font-size:20px;">Frequency Spectrum</h4>
164
+ {spectrum_html}
165
+ <h4 style="font-size:20px;">Spectrogram</h4>
166
+ {spectrogram_html}
167
+ <h4 style="font-size:20px;">Histogram of Sample Amplitudes</h4>
168
+ {histogram_html}
169
+ <hr>
170
+ """
171
+ except Exception as e:
172
+ # Handle errors gracefully
173
+ output_html += f"<p><strong>File:</strong> {os.path.basename(audio_file)}</p><p><strong>Error:</strong> {str(e)}</p><hr>"
174
+
175
+ # Return the aggregated HTML output
176
+ return output_html
177
+
178
+
179
+ with gr.Blocks() as demo:
180
+ gr.Markdown("Wave Wizard")
181
+ gr.Markdown(
182
+ "Upload one or more audio files, or specify a folder containing audio files.")
183
+ with gr.Row():
184
+ file_input = gr.Files(label="Upload Audio Files",
185
+ type="filepath", file_count="multiple")
186
+ folder_input = gr.Textbox(label="Folder Path (optional)",
187
+ placeholder="Enter folder path containing audio files")
188
+ analyze_button = gr.Button("Analyze")
189
+ output_display = gr.HTML()
190
+
191
+ def analyze_wrapper(files, folder_path):
192
+ outputs = analyze_audio_files(files, folder_path)
193
+ return outputs
194
+
195
+ analyze_button.click(analyze_wrapper, inputs=[
196
+ file_input, folder_input], outputs=output_display)
197
+
198
+ demo.launch()