KarthickAdopleAI commited on
Commit
3de47d3
·
verified ·
1 Parent(s): c700ffd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +244 -74
app.py CHANGED
@@ -1,7 +1,9 @@
 
1
  from stable_whisper import modify_model,results_to_word_srt, results_to_sentence_srt
2
  import whisper
3
  import pysrt
4
  import re
 
5
  import os
6
  from copy import deepcopy
7
  from typing import List
@@ -12,29 +14,32 @@ from nltk.corpus import stopwords
12
  from nltk.tokenize import word_tokenize
13
  from nltk import FreqDist
14
  from nltk.metrics import jaccard_distance
15
-
 
 
 
 
 
 
 
16
  from moviepy.video.io.VideoFileClip import VideoFileClip
17
  from moviepy.video.VideoClip import ImageClip
18
  from datetime import datetime
 
19
  import gradio as gr
20
- import nltk
21
- nltk.download('stopwords')
22
- nltk.download('punkt')
23
 
24
  huggingfacehub_api_token = os.getenv("HF_TOKEN")
 
25
 
26
- class VideoQA:
27
  def __init__(self):
28
- # self.loader = UnstructuredPDFLoader("/content/Document_ Introduction to Python (1).pdf")
29
- # self.extracted_text=self.loader.load()
 
 
 
 
 
30
 
31
- # self.huggingfacehub_api_token = # Replace with your Hugging Face token
32
- self.repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
33
- self.llm = HuggingFaceHub(
34
- huggingfacehub_api_token=huggingfacehub_api_token,
35
- repo_id=self.repo_id,
36
- model_kwargs={"temperature": 0.2, "max_new_tokens": 800}
37
- )
38
 
39
  def load_model(self,model_selected):
40
  """
@@ -119,7 +124,6 @@ class VideoQA:
119
  result['srt'] = self.whisper_result_to_srt(result)
120
  return result
121
 
122
-
123
  def to_srt(self,lines: List[dict], strip=False) -> str:
124
  """
125
  lines: List[dict]
@@ -204,8 +208,6 @@ class VideoQA:
204
  srt = self.to_srt(segs, strip=strip)
205
  return srt
206
 
207
-
208
-
209
  def extract_timestamps_and_text(self,input_text):
210
  timestamp_pattern = re.compile(r'(\d{2}:\d{2}:\d{2}.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}.\d{3})\n(.+)')
211
 
@@ -223,8 +225,46 @@ class VideoQA:
223
 
224
  return data
225
 
 
 
 
 
 
226
 
227
- def generate_contract(self,text,question):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
 
230
  template = """you are the german language and universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.your returning answer must in user's language.otherwise reply i don't know.
@@ -241,15 +281,38 @@ class VideoQA:
241
  return result
242
 
243
 
244
- def preprocess_sentence(self,sentence):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  stop_words = set(stopwords.words('english'))
246
  words = word_tokenize(sentence.lower())
247
  filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
248
  return filtered_words
249
 
250
  def compute_similarity(self,sentence1, sentence2):
251
- words1 = self.preprocess_sentence(sentence1)
252
- words2 = self.preprocess_sentence(sentence2)
253
 
254
  freq_dist1 = FreqDist(words1)
255
  freq_dist2 = FreqDist(words2)
@@ -258,7 +321,7 @@ class VideoQA:
258
 
259
  return jaccard
260
 
261
- def find_most_similar(self,sentence_list, target_sentence):
262
  similarities = [self.compute_similarity(target_sentence, sentence) for sentence in sentence_list]
263
 
264
  # Find the index of the most similar sentence
@@ -268,14 +331,14 @@ class VideoQA:
268
  return sentence_list[most_similar_index]
269
 
270
 
271
- def start_end_timestamp(self,result,answer):
272
  appended_text = []
273
 
274
  for item in result:
275
  appended_text.append(item['text'])
276
 
277
  # Find the most similar sentence
278
- matched_sentence = self.find_most_similar(appended_text, answer)
279
  start_time=""
280
  end_time=""
281
  for entry in result:
@@ -285,7 +348,41 @@ class VideoQA:
285
  print(start_time+"\n"+end_time)
286
  return start_time,end_time
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
 
289
 
290
  def timestamp_to_seconds(self,timestamp):
291
  time_format = "%H:%M:%S,%f"
@@ -300,63 +397,136 @@ class VideoQA:
300
  # Use moviepy to cut both video and audio
301
  video_clip = VideoFileClip(input_file).subclip(start_time, end_time)
302
  video_clip.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile='temp-audio.m4a', remove_temp=True)
303
-
304
- def sentence_timestamp(self,data):
305
- result_sentences = []
306
-
307
- current_sentence = ""
308
- current_start_timestamp = ""
309
-
310
- for entry in data:
311
- text = entry['text']
312
- start_timestamp = entry['start_timestamp']
313
- end_timestamp = entry['end_timestamp']
314
 
315
- # If the current sentence is empty, update start timestamp
316
- if not current_sentence:
317
- current_start_timestamp = start_timestamp
318
-
319
- # Concatenate sentences until a sentence ends with a full stop
320
- current_sentence += " " + text
321
- if text.endswith('.'):
322
- result_sentences.append({
323
- 'start_timestamp': current_start_timestamp,
324
- 'end_timestamp': end_timestamp,
325
- 'text': current_sentence.strip()
326
- })
327
- current_sentence = ""
328
- return result_sentences
329
-
330
- def main(self,input_video_path,question):
331
 
332
- subtitle = self.transcribe_video(input_video_path,'medium')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  text = subtitle['text']
334
- answer = self.generate_contract(text,question)
 
335
 
336
  subrip_text = self.results_to_sentence_srt(subtitle)
337
  result = self.extract_timestamps_and_text(subrip_text)
338
- sent = self.sentence_timestamp(result)
339
- start_time,end_time = self.start_end_timestamp(sent,answer)
340
  output_video_path = 'output_video.mp4'
341
 
342
- self.cut_video(input_video_path, output_video_path, start_time, end_time)
343
  return output_video_path
344
 
345
- def gradio_interface(self):
346
-
347
- with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
348
- gr.HTML("""<center><h1>Video Question Answering</h1></center>""")
349
- with gr.Row():
350
- video = gr.Video(elem_classes="videosize")
351
- with gr.Row():
352
- query = gr.Textbox(label="Query")
353
- with gr.Row():
354
- output_video = gr.Video(elem_classes="videosize")
355
-
356
- query.submit(self.main,[video,query],output_video)
357
- demo.launch(debug=True)
358
-
359
- if __name__=="__main__":
360
- video_qa = VideoQA()
361
- video_qa.gradio_interface()
362
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  from stable_whisper import modify_model,results_to_word_srt, results_to_sentence_srt
3
  import whisper
4
  import pysrt
5
  import re
6
+ from pytube import YouTube
7
  import os
8
  from copy import deepcopy
9
  from typing import List
 
14
  from nltk.tokenize import word_tokenize
15
  from nltk import FreqDist
16
  from nltk.metrics import jaccard_distance
17
+ from nltk.corpus import stopwords
18
+ from nltk.tokenize import word_tokenize
19
+ from nltk.stem import PorterStemmer
20
+ from sklearn.feature_extraction.text import TfidfVectorizer
21
+ from sklearn.metrics.pairwise import cosine_similarity
22
+ import nltk
23
+ nltk.download('stopwords')
24
+ nltk.download('punkt')
25
  from moviepy.video.io.VideoFileClip import VideoFileClip
26
  from moviepy.video.VideoClip import ImageClip
27
  from datetime import datetime
28
+ import moviepy.editor as mpy
29
  import gradio as gr
 
 
 
30
 
31
  huggingfacehub_api_token = os.getenv("HF_TOKEN")
32
+ class VideoEditor():
33
 
 
34
  def __init__(self):
35
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
36
+ self.llm = HuggingFaceHub(
37
+ huggingfacehub_api_token=huggingfacehub_api_token,
38
+ repo_id=repo_id,
39
+ model_kwargs={"temperature": 0.2,"max_new_tokens":1000}
40
+ )
41
+ # self.data_json = {'topics': []}
42
 
 
 
 
 
 
 
 
43
 
44
  def load_model(self,model_selected):
45
  """
 
124
  result['srt'] = self.whisper_result_to_srt(result)
125
  return result
126
 
 
127
  def to_srt(self,lines: List[dict], strip=False) -> str:
128
  """
129
  lines: List[dict]
 
208
  srt = self.to_srt(segs, strip=strip)
209
  return srt
210
 
 
 
211
  def extract_timestamps_and_text(self,input_text):
212
  timestamp_pattern = re.compile(r'(\d{2}:\d{2}:\d{2}.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}.\d{3})\n(.+)')
213
 
 
225
 
226
  return data
227
 
228
+ def sentence_timestamp(self,data):
229
+ result_sentences = []
230
+
231
+ current_sentence = ""
232
+ current_start_timestamp = ""
233
 
234
+ for entry in data:
235
+ text = entry['text']
236
+ start_timestamp = entry['start_timestamp']
237
+ end_timestamp = entry['end_timestamp']
238
+
239
+ # If the current sentence is empty, update start timestamp
240
+ if not current_sentence:
241
+ current_start_timestamp = start_timestamp
242
+
243
+ # Concatenate sentences until a sentence ends with a full stop
244
+ current_sentence += " " + text
245
+ if text.endswith('.'):
246
+ result_sentences.append({
247
+ 'start_timestamp': current_start_timestamp,
248
+ 'end_timestamp': end_timestamp,
249
+ 'text': current_sentence.strip()
250
+ })
251
+ current_sentence = ""
252
+ return result_sentences
253
+
254
+ def timestamp_text_to_list(self,result_sentences):
255
+ text_list = [item['text'] for item in result_sentences]
256
+
257
+ return text_list
258
+
259
+ def list_to_json(self,text_list):
260
+ jsonfile = {
261
+ "sentences": text_list
262
+ }
263
+ json_text = str(jsonfile)
264
+ return json_text
265
+
266
+
267
+ def video_qa_generate_contract(self,text,question):
268
 
269
 
270
  template = """you are the german language and universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.your returning answer must in user's language.otherwise reply i don't know.
 
281
  return result
282
 
283
 
284
+ def topic_generate_contract(self,json_text,subrip):
285
+
286
+
287
+ template = """your first task is extract all topics discussed in the given content.
288
+
289
+ second task is analyze the given paragraph and extract answer for the first task's extracted topics.
290
+ don't genarate answer yourself just extract related answer from the given paragraph.
291
+
292
+ returing answer format:
293
+ Topic:Topic
294
+ Sentence:*Topic* Sentence
295
+
296
+ ```content:{content}```
297
+ ```paragraph:{paragraph}```
298
+ """
299
+
300
+ prompt = PromptTemplate(template=template, input_variables=["content","paragraph"])
301
+ llm_chain = LLMChain(prompt=prompt, verbose=True, llm=self.llm)
302
+
303
+ result = llm_chain.run({"content":json_text,"paragraph":subrip['text']})
304
+ return result
305
+
306
+
307
+ def video_qa_preprocess_sentence(self,sentence):
308
  stop_words = set(stopwords.words('english'))
309
  words = word_tokenize(sentence.lower())
310
  filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
311
  return filtered_words
312
 
313
  def compute_similarity(self,sentence1, sentence2):
314
+ words1 = self.video_qa_preprocess_sentence(sentence1)
315
+ words2 = self.video_qa_preprocess_sentence(sentence2)
316
 
317
  freq_dist1 = FreqDist(words1)
318
  freq_dist2 = FreqDist(words2)
 
321
 
322
  return jaccard
323
 
324
+ def video_qa_find_most_similar(self,sentence_list, target_sentence):
325
  similarities = [self.compute_similarity(target_sentence, sentence) for sentence in sentence_list]
326
 
327
  # Find the index of the most similar sentence
 
331
  return sentence_list[most_similar_index]
332
 
333
 
334
+ def video_qa_start_end_timestamp(self,result,answer):
335
  appended_text = []
336
 
337
  for item in result:
338
  appended_text.append(item['text'])
339
 
340
  # Find the most similar sentence
341
+ matched_sentence = self.video_qa_find_most_similar(appended_text, answer)
342
  start_time=""
343
  end_time=""
344
  for entry in result:
 
348
  print(start_time+"\n"+end_time)
349
  return start_time,end_time
350
 
351
+ # Function to preprocess and tokenize a sentence
352
+ def preprocess_sentence(self,sentence):
353
+ stop_words = set(stopwords.words('english'))
354
+ ps = PorterStemmer()
355
+
356
+ # Tokenize and remove stopwords
357
+ words = word_tokenize(sentence)
358
+ words = [ps.stem(word.lower()) for word in words if word.isalnum() and word.lower() not in stop_words]
359
+
360
+ return ' '.join(words)
361
+
362
+ # Function to find the most similar sentence in list1 for a given sentence in list2
363
+ def topic_find_most_similar(self,sentence, list1):
364
+ similarities = []
365
+ processed_sentence = self.preprocess_sentence(sentence)
366
+
367
+ for candidate_sentence in list1:
368
+ similarity = self.calculate_cosine_similarity(processed_sentence, candidate_sentence)
369
+ similarities.append(similarity)
370
+
371
+ # Find the index of the most similar sentence in list1
372
+ max_similarity_index = similarities.index(max(similarities))
373
+
374
+ return list1[max_similarity_index]
375
+
376
+ # Function to calculate cosine similarity between two sentences
377
+ def calculate_cosine_similarity(self,sentence1, sentence2):
378
+ # Create a TF-IDF vectorizer
379
+ vectorizer = TfidfVectorizer()
380
+ tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
381
+
382
+ # Calculate cosine similarity
383
+ cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
384
 
385
+ return cosine_sim
386
 
387
  def timestamp_to_seconds(self,timestamp):
388
  time_format = "%H:%M:%S,%f"
 
397
  # Use moviepy to cut both video and audio
398
  video_clip = VideoFileClip(input_file).subclip(start_time, end_time)
399
  video_clip.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile='temp-audio.m4a', remove_temp=True)
 
 
 
 
 
 
 
 
 
 
 
400
 
401
+ def start_end_timestamp(self,result,matched_sentence):
402
+ start_time=""
403
+ end_time=""
404
+ for entry in result:
405
+ if matched_sentence in entry['text']:
406
+ start_time = entry['start_timestamp']
407
+ end_time = entry['end_timestamp']
408
+ # print(start_time+"\n"+end_time)
409
+ return start_time,end_time
 
 
 
 
 
 
 
410
 
411
+ def video_write_funcion(self,vid,answer,text_list,result_sentences):
412
+ video = mpy.VideoFileClip(vid)
413
+ topics = {}
414
+ topics_list = answer.strip().split("\n\n") # Remove leading/trailing whitespaces
415
+ for topic in topics_list:
416
+ lines = topic.split("\n")
417
+ if len(lines) > 0:
418
+ topic = lines[0].split(":")[1].strip()
419
+ sentence = "".join([line.split(":")[1].strip() for line in lines[1:]])
420
+
421
+ sentence_list = sentence.split(".")
422
+ unique_similar_sentences = []
423
+ list1 = text_list
424
+ # Find the most similar sentence in list1 for each sentence in list2
425
+ for sentence2 in sentence_list:
426
+ most_similar_sentence = self.topic_find_most_similar(sentence2, list1)
427
+
428
+ # Check if the sentence is not already in the list before appending
429
+ if most_similar_sentence not in unique_similar_sentences:
430
+ unique_similar_sentences.append(most_similar_sentence)
431
+
432
+ # Print the unique most similar sentences
433
+ clips = []
434
+ for sentence in unique_similar_sentences:
435
+ # print(type(sentence))
436
+ start_time,end_time = self.start_end_timestamp(result_sentences,sentence)
437
+ clip = video.subclip(start_time, end_time)
438
+ clips.append(clip)
439
+ concatenated_clip = mpy.concatenate_videoclips(clips)
440
+ topics[topic] = concatenated_clip
441
+
442
+ for topic, clip in topics.items():
443
+ clip.write_videofile(f"{topic}.mp4")
444
+
445
+
446
+ def video_qa_main(self,input_path,video,question):
447
+ if input_path:
448
+ input_path = self.Download(input_path)
449
+ subtitle = self.transcribe_video(input_path,'medium')
450
+ elif video:
451
+ subtitle = self.transcribe_video(video,'medium')
452
+ input_path = video
453
+ print(subtitle['text'])
454
  text = subtitle['text']
455
+ answer = self.video_qa_generate_contract(text,question)
456
+ print("video_qa_generate_contract")
457
 
458
  subrip_text = self.results_to_sentence_srt(subtitle)
459
  result = self.extract_timestamps_and_text(subrip_text)
460
+ sent = self.sentence_timestamp(result)
461
+ start_time,end_time = self.video_qa_start_end_timestamp(sent,answer)
462
  output_video_path = 'output_video.mp4'
463
 
464
+ self.cut_video(input_path, output_video_path, start_time, end_time)
465
  return output_video_path
466
 
467
+ def Download(self,link):
468
+ youtubeObject = YouTube(link)
469
+ youtubeObject = youtubeObject.streams.get_highest_resolution()
470
+ try:
471
+ file_name = youtubeObject.download()
472
+ return file_name
473
+ except:
474
+ print("An error has occurred")
475
+ print("Download is completed successfully")
476
+
477
+ def topic_main(self,input_path,video):
478
+
479
+ if input_path:
480
+ input_path = self.Download(input_path)
481
+ subrip = self.transcribe_video(input_path,'medium')
482
+ elif video:
483
+ subrip = self.transcribe_video(video,'medium')
484
+ input_path = video
485
+ print(subrip['text'])
486
+ text = self.results_to_sentence_srt(subrip)
487
+ print("results_to_sentence_srt")
488
+ data = self.extract_timestamps_and_text(text)
489
+ print("extract_timestamps_and_text")
490
+ result_sentences = self.sentence_timestamp(data)
491
+ text_list = self.timestamp_text_to_list(result_sentences)
492
+ # print(text_list)
493
+ json_text = self.list_to_json(text_list)
494
+ # print(json_text)
495
+ print("list_to_json")
496
+ answer = self.topic_generate_contract(json_text,subrip)
497
+ # print(answer)
498
+ print("topic_generate_contract")
499
+ self.video_write_funcion(input_path,answer,text_list,result_sentences)
500
+ return "Topic Video Writted Successfully."
501
+
502
+
503
+ with gr.Blocks() as demo:
504
+ video_editor = VideoEditor()
505
+ gr.HTML("""<center><h1>Video Question Answering & Topic Extracter</h1></center>""")
506
+ with gr.Tab("Video QA"):
507
+ with gr.Row():
508
+ youtube_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
509
+ with gr.Row():
510
+ video = gr.Video(sources="upload",height=200,width=300)
511
+ with gr.Row():
512
+ query = gr.Textbox(label="Query")
513
+
514
+ with gr.Row():
515
+ output_video = gr.Video(height=200,width=300)
516
+
517
+ # if video and query:
518
+ # submit_btn.click(video_editor.video_qa_main,[video,query],output_video)
519
+ # elif youtube_link and query:
520
+ query.submit(video_editor.video_qa_main,[youtube_link,video,query],output_video)
521
+ with gr.Tab("Topic Extract"):
522
+ with gr.Row():
523
+ yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
524
+ with gr.Row():
525
+ video = gr.Video(height=200,width=300)
526
+ with gr.Row():
527
+ submit_btn = gr.Button(value="Submit")
528
+ with gr.Row():
529
+ textbox = gr.Textbox(label = "Status")
530
+
531
+ submit_btn.click(video_editor.topic_main,[yt_link,video],textbox)
532
+ demo.launch()