das1mtb56 commited on
Commit
2224634
Β·
verified Β·
1 Parent(s): 31f3a77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -78
app.py CHANGED
@@ -1,101 +1,78 @@
1
- import gradio as gr
2
- import whisper
3
  import os
4
- import re
5
  import yt_dlp
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
 
 
7
 
8
- # Load models
9
- whisper_model = whisper.load_model("base")
10
- multilingual_model = "csebuetnlp/mT5_multilingual_XLSum"
11
- tokenizer = AutoTokenizer.from_pretrained(multilingual_model)
12
- summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(multilingual_model)
13
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
14
 
15
- SUPPORTED_LANGUAGES = {
16
- "bn": "Bengali", "en": "English", "gu": "Gujarati", "hi": "Hindi",
17
- "kn": "Kannada", "ml": "Malayalam", "mr": "Marathi", "ta": "Tamil",
18
- "te": "Telugu", "ur": "Urdu"
19
- }
 
20
 
21
  def download_audio(youtube_url):
 
22
  ydl_opts = {
23
  'format': 'bestaudio/best',
24
- 'outtmpl': 'audio.%(ext)s',
25
- 'postprocessors': [{
26
- 'key': 'FFmpegExtractAudio',
27
- 'preferredcodec': 'mp3',
28
- }],
29
- 'cookiefile': 'cookies.txt' # <β€” Uses your uploaded cookie file
30
  }
31
-
32
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
33
- info_dict = ydl.extract_info(youtube_url, download=True)
34
- filename = ydl.prepare_filename(info_dict).replace(".webm", ".mp3").replace(".m4a", ".mp3")
35
- return filename
36
-
37
- def extract_thumbnail(youtube_url):
38
- match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", youtube_url)
39
- if match:
40
- video_id = match.group(1)
41
- return f"https://img.youtube.com/vi/{video_id}/0.jpg"
42
- return ""
43
 
44
- def summarize_text(text):
45
- input_text = f"summarize: {text}"
46
- inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
47
- summary_ids = summarizer_model.generate(inputs, max_length=150, min_length=30, num_beams=4)
48
- return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
49
 
50
- def transcribe_and_summarize(youtube_url, translate_to_english):
51
- try:
52
- audio_file = download_audio(youtube_url)
53
- result = whisper_model.transcribe(audio_file)
54
- transcript = result["text"]
55
- lang_code = result["language"]
56
 
57
- thumbnail_url = extract_thumbnail(youtube_url)
58
 
59
- if lang_code not in SUPPORTED_LANGUAGES:
60
- return None, f"❌ Language '{lang_code}' not supported.", "", "", None
61
 
62
- summary = summarize_text(transcript)
 
 
63
 
64
- if translate_to_english and lang_code != "en":
65
- translated_summary = translator(summary)[0]["translation_text"]
66
- else:
67
- translated_summary = summary
68
 
69
- os.remove(audio_file)
70
-
71
- summary_text = f"Transcript:\n{transcript}\n\nSummary:\n{translated_summary}"
72
-
73
- with open("summary.txt", "w", encoding="utf-8") as f:
74
- f.write(summary_text)
75
-
76
- return thumbnail_url, f"πŸ—£οΈ Language: {SUPPORTED_LANGUAGES[lang_code]}", transcript, translated_summary, "summary.txt"
77
-
78
- except Exception as e:
79
- return None, f"❌ Error: {str(e)}", "", "", None
80
 
81
- with gr.Blocks(css="style.css") as demo:
82
- gr.Markdown("<h1 style='text-align: center;'>🎬 Multilingual YouTube Summarizer</h1>")
83
- gr.Markdown("Paste any YouTube video link, and get transcript + summary. Works for Hindi, Bengali, Tamil, Urdu, and more!")
84
 
85
  with gr.Row():
86
- youtube_url = gr.Textbox(label="YouTube Video URL")
87
- translate_check = gr.Checkbox(label="Translate Summary to English", value=True)
88
-
89
- thumbnail = gr.Image(label="Video Thumbnail", type="filepath")
90
- lang_out = gr.Text(label="Detected Language")
91
- transcript_out = gr.Textbox(label="Transcript", lines=8)
92
- summary_out = gr.Textbox(label="Summary", lines=6)
93
- download_btn = gr.File(label="Download .txt")
94
 
95
- btn = gr.Button("Generate Summary")
96
-
97
- btn.click(fn=transcribe_and_summarize,
98
- inputs=[youtube_url, translate_check],
99
- outputs=[thumbnail, lang_out, transcript_out, summary_out, download_btn])
 
 
 
 
 
 
 
100
 
101
  demo.launch()
 
 
 
1
  import os
2
+ import gradio as gr
3
  import yt_dlp
4
+ import whisper
5
+ from transformers import pipeline, MarianMTModel, MarianTokenizer
6
+ from pytube import YouTube
7
+ import torch
8
 
9
+ # Load Whisper (tiny for CPU)
10
+ whisper_model = whisper.load_model("small")
 
 
 
 
11
 
12
+ # Load summarizer
13
+ summarizer = pipeline("summarization", model="Falconsai/text_summarization")
14
+
15
+ # Load translation model (multilingual to English)
16
+ translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-mul-en")
17
+ translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-mul-en")
18
 
19
  def download_audio(youtube_url):
20
+ output_file = "audio.webm"
21
  ydl_opts = {
22
  'format': 'bestaudio/best',
23
+ 'outtmpl': output_file,
24
+ 'quiet': True,
 
 
 
 
25
  }
 
26
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
27
+ ydl.download([youtube_url])
28
+ return output_file
 
 
 
 
 
 
 
 
29
 
30
+ def translate_to_english(text):
31
+ inputs = translation_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
32
+ translated = translation_model.generate(**inputs, max_length=512)
33
+ return translation_tokenizer.decode(translated[0], skip_special_tokens=True)
 
34
 
35
+ def process_video(url):
36
+ audio_path = download_audio(url)
37
+ result = whisper_model.transcribe(audio_path)
38
+ transcription = result["text"]
 
 
39
 
40
+ translated_text = translate_to_english(transcription)
41
 
42
+ # Summarize
43
+ summary = summarizer(translated_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
44
 
45
+ # Get thumbnail
46
+ yt = YouTube(url)
47
+ thumbnail_url = yt.thumbnail_url
48
 
49
+ return transcription, translated_text, summary, thumbnail_url, summary
 
 
 
50
 
51
+ def download_summary(text):
52
+ filename = "summary.txt"
53
+ with open(filename, "w", encoding="utf-8") as f:
54
+ f.write(text)
55
+ return filename
 
 
 
 
 
 
56
 
57
+ with gr.Blocks(theme=gr.themes.Soft(), title="πŸŽ₯ YouTube Video Summarizer with LLaMA") as demo:
58
+ gr.Markdown("## 🧠 Multilingual YouTube Summarizer")
59
+ gr.Markdown("Upload a video link and get the transcript, English translation, and summary.")
60
 
61
  with gr.Row():
62
+ youtube_input = gr.Text(label="YouTube Video URL", placeholder="https://www.youtube.com/watch?v=...")
63
+ submit_btn = gr.Button("Transcribe & Summarize")
 
 
 
 
 
 
64
 
65
+ with gr.Row():
66
+ with gr.Column():
67
+ transcript_output = gr.Textbox(label="πŸ”Š Original Transcript", lines=10)
68
+ translation_output = gr.Textbox(label="🌍 Translated to English", lines=10)
69
+ summary_output = gr.Textbox(label="🧾 Summary", lines=10)
70
+ download_btn = gr.Button("πŸ“₯ Download Summary")
71
+ download_file = gr.File(label="Download Link")
72
+ video_thumb = gr.Image(label="🎞️ Video Thumbnail", width=256)
73
+
74
+ submit_btn.click(fn=process_video, inputs=[youtube_input],
75
+ outputs=[transcript_output, translation_output, summary_output, video_thumb, download_file])
76
+ download_btn.click(fn=download_summary, inputs=[summary_output], outputs=[download_file])
77
 
78
  demo.launch()