import gradio as gr from audio_processing import process_audio, print_results import torch import spaces print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") else: print("No CUDA GPUs available. Running on CPU.") def transcribe_audio(audio_file, translate, model_size): language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size) output = "Detected language changes:\n\n" for segment in language_segments: output += f"Language: {segment['language']}\n" output += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n" output += f"Transcription with language detection and speaker diarization (using {model_size} model):\n\n" for segment in final_segments: output += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n" output += f"Original: {segment['text']}\n" if translate: output += f"Translated: {segment['translated']}\n" output += "\n" return output iface = gr.Interface( fn=transcribe_audio, inputs=[ gr.Audio(type="filepath"), gr.Checkbox(label="Enable Translation"), gr.Dropdown(choices=["tiny", "base", "small", "medium", "large","large-v2","large-v3"], label="Whisper Model Size", value="small") ], outputs="text", title="WhisperX Audio Transcription and Translation" ) iface.launch()