import os
import streamlit as st
from pytube import YouTube, exceptions as pytube_exceptions
from urllib.error import URLError
from tempfile import NamedTemporaryFile
import speech_recognition as sr
import subprocess
import pydub
import noisereduce as nr


# Function to resuce noise

def reduce_noise(file_path):
    # Load the audio file
    audio_data = pydub.AudioSegment.from_wav(file_path)
    # Convert to numpy array
    audio_np = np.array(audio_data.get_array_of_samples())
    # Perform noise reduction
    reduced_noise_audio = nr.reduce_noise(y=audio_np, sr=audio_data.frame_rate)
    # Save the processed audio back to a file
    processed_file_path = file_path.replace('.wav', '_processed.wav')
    pydub.AudioSegment(reduced_noise_audio.tobytes(), frame_rate=audio_data.frame_rate, sample_width=audio_data.sample_width, channels=audio_data.channels).export(processed_file_path, format="wav")
    return processed_file_path

# Function to split audio

def split_audio(file_path, chunk_length_ms=60000):  # 60 seconds
    """
    Splits the audio file into smaller chunks.
    :param file_path: Path to the audio file.
    :param chunk_length_ms: Length of each chunk in milliseconds.
    :return: List of audio chunk file paths.
    """
    audio = pydub.AudioSegment.from_wav(file_path)
    chunks = pydub.silence.split_on_silence(audio, min_silence_len=500, silence_thresh=audio.dBFS-14, keep_silence=500)

    chunk_files = []
    for i, chunk in enumerate(chunks):
        chunk_name = f"{file_path}_chunk{i}.wav"
        chunk.export(chunk_name, format="wav")
        chunk_files.append(chunk_name)

    return chunk_files


# Function to download audio from a YouTube video

def download_audio(url):
    try:
        yt = YouTube(url)
        audio_stream = yt.streams.filter(only_audio=True).first()
        with NamedTemporaryFile(delete=False) as tempfile:
    # Your code to use the tempfile

            audio_stream.download(filename=tempfile.name)
            return tempfile.name
    except pytube_exceptions.PytubeError as e:
        st.error(f"Error downloading video: {e}")
        return None

# Function to convert audio format in wav
def convert_to_wav(original_file):
    target_format = "wav"
    output_file = original_file.split('.')[0] + '.' + target_format

    # Command to convert the file using ffmpeg
    command = ['ffmpeg', '-i', original_file, '-ar', '16000', '-ac', '1', output_file]
    subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    return output_file

# Function to transcribe audio using SpeechRecognition

def transcribe_audio(file_path):
    """
    Transcribes the audio file using SpeechRecognition.
    :param file_path: Path to the audio file.
    :return: Transcribed text.
    """

    # Check if the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")

    recognizer = sr.Recognizer()
    transcription = ""

    # Check if the file is a single file or needs to be split into chunks

    if os.path.getsize(file_path) / (1024 * 1024) > 10:  # If file size is greater than 10MB
        chunk_files = split_audio(file_path)
    else:
        chunk_files = [file_path]

    for chunk_file in chunk_files:
        with sr.AudioFile(chunk_file) as source:
            audio_data = recognizer.record(source)
            try:
                transcription += recognizer.recognize_google(audio_data) + " "
            except sr.UnknownValueError:
                transcription += "[Unintelligible] "
            except sr.RequestError as e:
                transcription += f"[Error: {e}] "
        os.remove(chunk_file)  # Delete chunk file after processing

    return transcription.strip()


# Function to handle the overall transcription process
def transcribe_youtube_video(url):
    try:
        audio_path = download_audio(url)
        if audio_path:
            wav_audio_path = convert_to_wav(audio_path)
            transcription = transcribe_audio(wav_audio_path)
            os.remove(audio_path)  # Delete the original temporary file
            os.remove(wav_audio_path)  # Delete the converted temporary file
            return transcription
        else:
            return "Failed to download audio from YouTube."
    except URLError:
        return "Error in network connection. Please check your connection and try again."
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Main function for the Streamlit app

def main():
    st.title("YouTube Video Transcriber")
    url = st.text_input("Enter the URL of the YouTube video:")

    if st.button("Transcribe"):
        if url:
            with st.spinner('Transcribing...'):
                transcription = transcribe_youtube_video(url)
                st.text_area("Transcription:", transcription, height=300)
        else:
            st.error("Please enter a valid YouTube URL.")

if __name__ == "__main__":
    main()