KarthickAdopleAI commited on
Commit
e21cd35
1 Parent(s): 3647674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -125
app.py CHANGED
@@ -4,7 +4,6 @@ from huggingface_hub import InferenceClient
4
  import os
5
  import ffmpeg
6
  from typing import List
7
- from moviepy.editor import VideoFileClip
8
  import nltk
9
  from gtts import gTTS
10
  from sklearn.feature_extraction.text import TfidfVectorizer
@@ -17,8 +16,9 @@ import os
17
  from pydub import AudioSegment
18
  import speech_recognition as sr
19
  import torchaudio
20
- from pydub.silence import split_on_silence
21
  from speechbrain.inference.classifiers import EncoderClassifier
 
 
22
  nltk.download('punkt')
23
  nltk.download('stopwords')
24
 
@@ -37,8 +37,9 @@ class VideoAnalytics:
37
  """
38
  # Initialize AzureOpenAI client
39
  self.client = AzureOpenAI()
40
-
41
- self.mistral_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
42
 
43
  # Initialize transcribed text variable
44
  self.transcribed_text = ""
@@ -74,18 +75,52 @@ class VideoAnalytics:
74
  try:
75
  # Load the MP3 file
76
  audio = AudioSegment.from_mp3(mp3_file)
77
-
78
  # Export the audio to WAV format
79
  audio.export(wav_file, format="wav")
80
-
81
  logging.info(f"MP3 file '{mp3_file}' converted to WAV successfully: {wav_file}")
82
-
83
  return wav_file
84
  except Exception as e:
85
  # Log the exception and raise it further
86
  logging.error(f"Error occurred while converting MP3 to WAV: {e}")
87
  raise e
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # Function to recognize speech in the audio file
90
  def transcribe_audio(self,path: str,lang: str):
91
  """Transcribe speech from an audio file."""
@@ -108,51 +143,30 @@ class VideoAnalytics:
108
  sound = AudioSegment.from_file(path)
109
  chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=sound.dBFS-14, keep_silence=500)
110
  folder_name = "audio-chunks"
111
-
112
  if not os.path.isdir(folder_name):
113
  os.mkdir(folder_name)
114
-
115
  whole_text = ""
116
-
117
  for i, audio_chunk in enumerate(chunks, start=1):
118
  chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
119
  audio_chunk.export(chunk_filename, format="wav")
120
-
121
  text = self.transcribe_audio(chunk_filename,lang)
122
-
123
  if text:
124
  text = f"{text.capitalize()}. "
125
  logging.info(f"Transcribed {chunk_filename}: {text}")
126
  whole_text += text
127
  else:
128
  logging.warning(f"No speech recognized in {chunk_filename}")
129
-
130
  return whole_text
131
  except Exception as e:
132
  logging.error(f"Error processing audio: {e}")
133
  return ""
134
-
135
- def split_audio(self,input_file):
136
- # Load the audio file
137
- audio = AudioSegment.from_file(input_file)
138
-
139
- # Define segment length in milliseconds (5 minutes = 300,000 milliseconds)
140
- segment_length = 60000
141
-
142
- # Split the audio into segments
143
- segments = []
144
- for i, start_time in enumerate(range(0, len(audio), segment_length)):
145
- # Calculate end time for current segment
146
- end_time = start_time + segment_length if start_time + segment_length < len(audio) else len(audio)
147
-
148
- # Extract segment
149
- segment = audio[start_time:end_time]
150
-
151
- # Append segment to list
152
- segments.append(segment)
153
 
154
- return segments
155
-
156
  def transcribe_video(self, vid: str) -> str:
157
  """
158
  Transcribe the audio of the video.
@@ -191,68 +205,133 @@ class VideoAnalytics:
191
  logging.error(f"Error transcribing video: {e}")
192
  return ""
193
 
194
- def generate_video_summary(self) -> str:
195
  """
196
- Generate a summary of the transcribed video.
197
  Returns:
198
  str: Generated summary.
199
  """
200
  try:
201
- # Define a conversation between system and user
202
- conversation = [
203
- {"role": "system", "content": "You are a Summarizer"},
204
- {"role": "user", "content": f"""summarize the following text delimited by triple backticks.Output must in english.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  In two format of Outputs given below:
206
  Abstractive Summary:
207
  Extractive Summary:
208
- ```{self.english_text}```
209
- """}
210
- ]
211
- # Generate completion using ChatGPT model
212
- response = self.client.chat.completions.create(
213
- model="ChatGPT",
214
- messages=conversation,
215
- temperature=0,
216
- max_tokens=1000
217
- )
218
- # Get the generated summary message
219
- message = response.choices[0].message.content
220
- return message
221
  except Exception as e:
222
  logging.error(f"Error generating video summary: {e}")
223
  return ""
224
 
225
 
226
- def generate_topics(self) -> str:
227
  """
228
- Generate topics from the transcribed video.
229
  Returns:
230
  str: Generated topics.
231
  """
232
  try:
233
- # Define a conversation between system and user
234
- conversation = [
235
- {"role": "system", "content": "You are a Topic Generator"},
236
- {"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.Output must in english.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  list out the topics:
238
  Topics:
239
- ```{self.english_text}```
240
- """}
241
- ]
242
- # Generate completion using ChatGPT model
243
- response = self.client.chat.completions.create(
244
- model="ChatGPT",
245
- messages=conversation,
246
- temperature=0,
247
- max_tokens=1000
248
- )
249
- # Get the generated topics message
250
- message = response.choices[0].message.content
251
- return message
252
  except Exception as e:
253
  logging.error(f"Error generating topics: {e}")
254
  return ""
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  def translation(self) -> str:
257
  """
258
  translation from the transcribed video.
@@ -299,7 +378,7 @@ class VideoAnalytics:
299
  return prompt+prompt1
300
 
301
 
302
- def generate(self, prompt: str, transcribed_text: str, temperature=0.9, max_new_tokens=5000, top_p=0.95,
303
  repetition_penalty=1.0) -> str:
304
  """
305
  Generates text based on the prompt and transcribed text.
@@ -328,11 +407,8 @@ class VideoAnalytics:
328
  seed=42,
329
  )
330
 
331
- # Format the prompt
332
- formatted_prompt = self.format_prompt(prompt,transcribed_text)
333
-
334
  # Generate text using the mistral client
335
- stream = self.mistral_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
336
  output = ""
337
  # Concatenate generated text
338
  for response in stream:
@@ -354,7 +430,7 @@ class VideoAnalytics:
354
  try:
355
  if model == "OpenAI":
356
  template = """you are the universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.otherwise reply i don't know.
357
- extracted_text:{text}
358
  user_question:{question}"""
359
 
360
  prompt = PromptTemplate(template=template, input_variables=["text","question"])
@@ -372,46 +448,6 @@ class VideoAnalytics:
372
  logging.error(f"Error in video question answering: {e}")
373
  return "An error occurred during video question answering."
374
 
375
- def extract_video_important_sentence(self) -> str:
376
- """
377
- Extract important sentences from the transcribed video.
378
- Returns:
379
- str: Extracted important sentences.
380
- """
381
- try:
382
-
383
- # Tokenize the sentences
384
- sentences = nltk.sent_tokenize(self.english_text)
385
-
386
- # Initialize TF-IDF vectorizer
387
- tfidf_vectorizer = TfidfVectorizer()
388
-
389
- # Fit the vectorizer on the summary sentences
390
- tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
391
-
392
- # Calculate sentence scores based on TF-IDF values
393
- sentence_scores = tfidf_matrix.sum(axis=1)
394
-
395
- # Create a list of (score, sentence) tuples
396
- sentence_rankings = [(score, sentence) for score, sentence in zip(sentence_scores, sentences)]
397
-
398
- # Sort sentences by score in descending order
399
- sentence_rankings.sort(reverse=True)
400
-
401
- # Set a threshold for selecting sentences
402
- threshold = 2.5 # Adjust as needed
403
-
404
- # Select sentences with scores above the threshold
405
- selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
406
-
407
- # Join selected sentences to form the summary
408
- summary = '\n\n'.join(selected_sentences)
409
-
410
- return summary
411
-
412
- except Exception as e:
413
- logging.error(f"Error extracting important sentences: {e}")
414
- return ""
415
 
416
  def write_text_files(self, text: str, filename: str) -> None:
417
  """
@@ -481,7 +517,7 @@ class VideoAnalytics:
481
  logging.error(f"Error occurred while saving audio: {e}")
482
  raise e
483
 
484
- def main(self, video: str = None, input_path: str = None) -> tuple:
485
  """
486
  Perform video analytics.
487
  Args:
@@ -511,13 +547,13 @@ class VideoAnalytics:
511
  else:
512
  return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
513
  # Generate summary, important sentences, and topics
514
- summary = self.generate_video_summary()
515
  self.write_text_files(summary,"Summary")
516
  summary_voice = self.save_audio_with_gtts(summary,"summary.mp3")
517
- important_sentences = self.extract_video_important_sentence()
518
  self.write_text_files(important_sentences,"Important_Sentence")
519
  important_sentences_voice = self.save_audio_with_gtts(important_sentences,"important_sentences.mp3")
520
- topics = self.generate_topics()
521
  self.write_text_files(topics,"Topics")
522
  topics_voice = self.save_audio_with_gtts(topics,"topics.mp3")
523
 
@@ -533,7 +569,10 @@ class VideoAnalytics:
533
  with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
534
  gr.HTML("""<center><h1>Video Analytics</h1></center>""")
535
  with gr.Row():
536
- yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
 
 
 
537
  with gr.Row():
538
  video = gr.Video(sources="upload",height=200,width=300)
539
  with gr.Row():
@@ -567,9 +606,9 @@ class VideoAnalytics:
567
  model = gr.Dropdown(["OpenAI", "Mixtral"],show_label=False,value="model")
568
  with gr.Row():
569
  result = gr.Textbox(label='Answer',lines=10)
570
- submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics,summary_audio,important_sentence_audio,topics_audio])
571
  question.submit(self.video_qa,[question,model],result)
572
- demo.launch()
573
 
574
  if __name__ == "__main__":
575
  video_analytics = VideoAnalytics()
 
4
  import os
5
  import ffmpeg
6
  from typing import List
 
7
  import nltk
8
  from gtts import gTTS
9
  from sklearn.feature_extraction.text import TfidfVectorizer
 
16
  from pydub import AudioSegment
17
  import speech_recognition as sr
18
  import torchaudio
 
19
  from speechbrain.inference.classifiers import EncoderClassifier
20
+ from pydub.silence import split_on_silence
21
+ from moviepy.editor import VideoFileClip
22
  nltk.download('punkt')
23
  nltk.download('stopwords')
24
 
 
37
  """
38
  # Initialize AzureOpenAI client
39
  self.client = AzureOpenAI()
40
+
41
+ hf_token =os.getenv("HF_TOKEN")
42
+ self.mistral_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",token=hf_token)
43
 
44
  # Initialize transcribed text variable
45
  self.transcribed_text = ""
 
75
  try:
76
  # Load the MP3 file
77
  audio = AudioSegment.from_mp3(mp3_file)
78
+
79
  # Export the audio to WAV format
80
  audio.export(wav_file, format="wav")
81
+
82
  logging.info(f"MP3 file '{mp3_file}' converted to WAV successfully: {wav_file}")
83
+
84
  return wav_file
85
  except Exception as e:
86
  # Log the exception and raise it further
87
  logging.error(f"Error occurred while converting MP3 to WAV: {e}")
88
  raise e
89
 
90
+ def split_audio(self, input_file: str) -> list:
91
+ """
92
+ Split an audio file into segments of a specified length.
93
+
94
+ Args:
95
+ input_file (str): Path to the input audio file.
96
+
97
+ Returns:
98
+ list: List of audio segments.
99
+ """
100
+ try:
101
+ # Load the audio file
102
+ audio = AudioSegment.from_file(input_file)
103
+
104
+ # Define segment length in milliseconds (5 minutes = 300,000 milliseconds)
105
+ segment_length = 60000
106
+
107
+ # Split the audio into segments
108
+ segments = []
109
+ for i, start_time in enumerate(range(0, len(audio), segment_length)):
110
+ # Calculate end time for current segment
111
+ end_time = start_time + segment_length if start_time + segment_length < len(audio) else len(audio)
112
+
113
+ # Extract segment
114
+ segment = audio[start_time:end_time]
115
+
116
+ # Append segment to list
117
+ segments.append(segment)
118
+
119
+ return segments
120
+ except Exception as e:
121
+ print(f"An error occurred: {e}")
122
+ return []
123
+
124
  # Function to recognize speech in the audio file
125
  def transcribe_audio(self,path: str,lang: str):
126
  """Transcribe speech from an audio file."""
 
143
  sound = AudioSegment.from_file(path)
144
  chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=sound.dBFS-14, keep_silence=500)
145
  folder_name = "audio-chunks"
146
+
147
  if not os.path.isdir(folder_name):
148
  os.mkdir(folder_name)
149
+
150
  whole_text = ""
151
+
152
  for i, audio_chunk in enumerate(chunks, start=1):
153
  chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
154
  audio_chunk.export(chunk_filename, format="wav")
155
+
156
  text = self.transcribe_audio(chunk_filename,lang)
157
+
158
  if text:
159
  text = f"{text.capitalize()}. "
160
  logging.info(f"Transcribed {chunk_filename}: {text}")
161
  whole_text += text
162
  else:
163
  logging.warning(f"No speech recognized in {chunk_filename}")
164
+
165
  return whole_text
166
  except Exception as e:
167
  logging.error(f"Error processing audio: {e}")
168
  return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
 
 
170
  def transcribe_video(self, vid: str) -> str:
171
  """
172
  Transcribe the audio of the video.
 
205
  logging.error(f"Error transcribing video: {e}")
206
  return ""
207
 
208
+ def generate_video_summary(self,model) -> str:
209
  """
210
+ Generate a summary of the transcribe_video.
211
  Returns:
212
  str: Generated summary.
213
  """
214
  try:
215
+ if model == "OpenAI":
216
+ # Define a conversation between system and user
217
+ conversation = [
218
+ {"role": "system", "content": "You are a Summarizer"},
219
+ {"role": "user", "content": f"""summarize the following text delimited by triple backticks.Output must in english.give me a detailed summary.extractive summary working br like extract sentences from given text to return as summary,abstractive summary working be like summary of what about the given text.don't make bullet points write like a passage.
220
+ In two format of Outputs given below:
221
+ Abstractive Summary:
222
+ Extractive Summary:
223
+ ```{self.english_text}```
224
+ """}
225
+ ]
226
+ # Generate completion using ChatGPT model
227
+ response = self.client.chat.completions.create(
228
+ model="ChatGPT",
229
+ messages=conversation,
230
+ temperature=0,
231
+ max_tokens=1000
232
+ )
233
+ # Get the generated summary message
234
+ message = response.choices[0].message.content
235
+ return message
236
+
237
+ elif model == "Mixtral":
238
+ task = "summary"
239
+ # Generate answer using Mixtral model
240
+ prompt = f"""<s>[INST] summarize the following text delimited by triple backticks.Output must in english.give me a detailed summary.extractive summary working br like extract sentences from given text to return as summary,abstractive summary working be like summary of what about the given text.don't make bullet points write like a passage.
241
  In two format of Outputs given below:
242
  Abstractive Summary:
243
  Extractive Summary:
244
+ ```data:{self.english_text}```[/INST]"""
245
+ result = self.generate(prompt)
246
+ return result
247
+
 
 
 
 
 
 
 
 
 
248
  except Exception as e:
249
  logging.error(f"Error generating video summary: {e}")
250
  return ""
251
 
252
 
253
+ def generate_topics(self,model) -> str:
254
  """
255
+ Generate topics from the transcribe_video.
256
  Returns:
257
  str: Generated topics.
258
  """
259
  try:
260
+ if model == "OpenAI":
261
+ # Define a conversation between system and user
262
+ conversation = [
263
+ {"role": "system", "content": "You are a Topic Generator"},
264
+ {"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.Output must in english.
265
+ list out the topics:
266
+ Topics:
267
+ ```{self.english_text}```
268
+ """}
269
+ ]
270
+ # Generate completion using ChatGPT model
271
+ response = self.client.chat.completions.create(
272
+ model="ChatGPT",
273
+ messages=conversation,
274
+ temperature=0,
275
+ max_tokens=1000
276
+ )
277
+ # Get the generated topics message
278
+ message = response.choices[0].message.content
279
+ return message
280
+ elif model == "Mixtral":
281
+ task = "topics"
282
+ # Generate answer using Mixtral model
283
+ prompt = f"""<s>[INST]generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.Output must in english.
284
  list out the topics:
285
  Topics:
286
+ ```data:{self.english_text}```[/INST]"""
287
+ result = self.generate(prompt)
288
+ return result
289
+
 
 
 
 
 
 
 
 
 
290
  except Exception as e:
291
  logging.error(f"Error generating topics: {e}")
292
  return ""
293
 
294
+ def extract_video_important_sentence(self,model) -> str:
295
+ """
296
+ Extract important sentences from the pdf.
297
+ Returns:
298
+ str: Extracted important sentences.
299
+ """
300
+ try:
301
+ if model == "OpenAI":
302
+ # Define a conversation between system and user
303
+ conversation = [
304
+ {"role": "system", "content": "You are a Sentence Extracter"},
305
+ {"role": "user", "content": f""" Extract Most important of the sentences from text.the text is given in triple backtics.
306
+ listout the sentences:
307
+ ```{self.english_text}```
308
+ """}
309
+ ]
310
+ # Generate completion using ChatGPT model
311
+ response = self.client.chat.completions.create(
312
+ model="ChatGPT",
313
+ messages=conversation,
314
+ temperature=0,
315
+ max_tokens=1000
316
+ )
317
+ # Get the generated topics message
318
+ message = response.choices[0].message.content
319
+ return message
320
+ elif model == "Mixtral":
321
+ task = "topics"
322
+ # Generate answer using Mixtral model
323
+ prompt = f"""<s>[INST] Extract Most important of the sentences from text.the text is given in triple backtics.
324
+ listout the sentences:
325
+ ```{self.english_text}```[/INST]"""
326
+ result = self.generate(prompt)
327
+ return result
328
+
329
+ except Exception as e:
330
+ logging.error(f"Error Extracting Important Sentence: {e}")
331
+ return ""
332
+
333
+
334
+
335
  def translation(self) -> str:
336
  """
337
  translation from the transcribed video.
 
378
  return prompt+prompt1
379
 
380
 
381
+ def generate(self, task: str,temperature=0.9, max_new_tokens=5000, top_p=0.95,
382
  repetition_penalty=1.0) -> str:
383
  """
384
  Generates text based on the prompt and transcribed text.
 
407
  seed=42,
408
  )
409
 
 
 
 
410
  # Generate text using the mistral client
411
+ stream = self.mistral_client.text_generation(task, **generate_kwargs, stream=True, details=True, return_full_text=False)
412
  output = ""
413
  # Concatenate generated text
414
  for response in stream:
 
430
  try:
431
  if model == "OpenAI":
432
  template = """you are the universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.otherwise reply i don't know.
433
+ english_text:{text}
434
  user_question:{question}"""
435
 
436
  prompt = PromptTemplate(template=template, input_variables=["text","question"])
 
448
  logging.error(f"Error in video question answering: {e}")
449
  return "An error occurred during video question answering."
450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
  def write_text_files(self, text: str, filename: str) -> None:
453
  """
 
517
  logging.error(f"Error occurred while saving audio: {e}")
518
  raise e
519
 
520
+ def main(self, video: str = None, input_path: str = None,model: str = None) -> tuple:
521
  """
522
  Perform video analytics.
523
  Args:
 
547
  else:
548
  return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
549
  # Generate summary, important sentences, and topics
550
+ summary = self.generate_video_summary(model)
551
  self.write_text_files(summary,"Summary")
552
  summary_voice = self.save_audio_with_gtts(summary,"summary.mp3")
553
+ important_sentences = self.extract_video_important_sentence(model)
554
  self.write_text_files(important_sentences,"Important_Sentence")
555
  important_sentences_voice = self.save_audio_with_gtts(important_sentences,"important_sentences.mp3")
556
+ topics = self.generate_topics(model)
557
  self.write_text_files(topics,"Topics")
558
  topics_voice = self.save_audio_with_gtts(topics,"topics.mp3")
559
 
 
569
  with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
570
  gr.HTML("""<center><h1>Video Analytics</h1></center>""")
571
  with gr.Row():
572
+ with gr.Column(scale=0.70):
573
+ yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
574
+ with gr.Column(scale=0.30):
575
+ model_selection = gr.Dropdown(["OpenAI", "Mixtral"],label="Model",value="model")
576
  with gr.Row():
577
  video = gr.Video(sources="upload",height=200,width=300)
578
  with gr.Row():
 
606
  model = gr.Dropdown(["OpenAI", "Mixtral"],show_label=False,value="model")
607
  with gr.Row():
608
  result = gr.Textbox(label='Answer',lines=10)
609
+ submit_btn.click(self.main,[video,yt_link,model_selection],[summary,Important_Sentences,Topics,summary_audio,important_sentence_audio,topics_audio])
610
  question.submit(self.video_qa,[question,model],result)
611
+ demo.launch(debug = True)
612
 
613
  if __name__ == "__main__":
614
  video_analytics = VideoAnalytics()