JaganathC commited on
Commit
58ddc5a
·
verified ·
1 Parent(s): fff62d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -35
app.py CHANGED
@@ -7,7 +7,7 @@ import json
7
  import time
8
  import langdetect
9
  import uuid
10
- from transformers import AutoTokenizer, AutoModelForCausalLM
11
 
12
  # Load Hugging Face Token
13
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -15,12 +15,19 @@ HF_TOKEN = os.getenv("HF_TOKEN")
15
  print("Starting the program...")
16
  model_path = "Qwen/Qwen2.5-7B-Instruct"
17
 
18
- # Check if CUDA is available
 
 
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  print(f"Using device: {device}")
21
 
22
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
23
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True).to(device).eval()
 
 
 
 
 
24
  print("Model successfully loaded.")
25
 
26
  def generate_unique_filename(extension):
@@ -33,40 +40,42 @@ def cleanup_files(*files):
33
  print(f"Removed file: {file}")
34
 
35
  def download_youtube_audio(url):
 
36
  print(f"Downloading audio from YouTube: {url}")
37
  output_path = generate_unique_filename(".wav")
38
-
39
  ydl_opts = {
40
  'format': 'bestaudio/best',
41
  'postprocessors': [{
42
  'key': 'FFmpegExtractAudio',
43
  'preferredcodec': 'wav',
 
44
  }],
45
  'outtmpl': output_path,
46
  }
47
-
48
  try:
49
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
50
  ydl.download([url])
 
 
51
  except Exception as e:
52
  return f"Error downloading audio: {str(e)}"
53
 
54
- if os.path.exists(output_path + ".wav"):
55
- os.rename(output_path + ".wav", output_path)
56
-
57
- return output_path
58
 
59
  def transcribe_audio(file_path):
 
60
  print(f"Starting transcription of file: {file_path}")
61
  temp_audio = None
62
-
63
  if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
64
  print("Video file detected. Extracting audio using ffmpeg...")
65
  temp_audio = generate_unique_filename(".wav")
66
  command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
67
  subprocess.run(command, check=True)
68
- file_path = temp_audio
69
-
70
  output_file = generate_unique_filename(".json")
71
  command = [
72
  "insanely-fast-whisper",
@@ -77,47 +86,61 @@ def transcribe_audio(file_path):
77
  "--timestamp", "chunk",
78
  "--transcript-path", output_file
79
  ]
80
-
81
  try:
82
  subprocess.run(command, check=True)
83
  except Exception as e:
84
  return f"Error in transcription: {str(e)}"
85
 
86
- with open(output_file, "r") as f:
87
- transcription = json.load(f)
88
-
89
- result = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
90
-
 
 
 
 
 
91
  cleanup_files(output_file)
92
  if temp_audio:
93
  cleanup_files(temp_audio)
94
 
95
- return result
96
 
97
  def generate_summary_stream(transcription):
98
- detected_language = langdetect.detect(transcription)
99
- prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:
100
- {transcription[:300000]}..."""
101
-
102
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
103
- output_ids = model.generate(input_ids, max_length=500)
104
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
105
-
106
- return response
 
 
 
 
 
 
 
107
 
108
  def process_youtube(url):
 
109
  if not url:
110
  return "Please enter a YouTube URL.", None
111
-
112
  audio_file = download_youtube_audio(url)
113
- if "Error" in audio_file:
114
  return audio_file, None
115
-
116
  transcription = transcribe_audio(audio_file)
117
- cleanup_files(audio_file)
118
  return transcription, None
119
 
120
  def process_uploaded_video(video_path):
 
121
  transcription = transcribe_audio(video_path)
122
  return transcription, None
123
 
@@ -126,7 +149,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
126
  # 🎥 Video Transcription and Smart Summary
127
  Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
128
  """)
129
-
130
  with gr.Tabs():
131
  with gr.TabItem("📤 Video Upload"):
132
  video_input = gr.Video()
@@ -135,11 +158,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
  with gr.TabItem("🔗 YouTube Link"):
136
  url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
137
  url_button = gr.Button("🚀 Process URL")
138
-
139
  transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
140
  summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
141
  summary_button = gr.Button("📝 Generate Summary")
142
-
143
  video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
144
  url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
145
  summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
 
7
  import time
8
  import langdetect
9
  import uuid
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
 
12
  # Load Hugging Face Token
13
  HF_TOKEN = os.getenv("HF_TOKEN")
 
15
  print("Starting the program...")
16
  model_path = "Qwen/Qwen2.5-7B-Instruct"
17
 
18
+ # **Efficient Model Loading**
19
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True) # Use 8-bit precision to reduce memory usage
20
+
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
  print(f"Using device: {device}")
23
 
24
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ model_path,
27
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
28
+ quantization_config=bnb_config, # Load in 8-bit to save memory
29
+ trust_remote_code=True
30
+ ).to(device).eval()
31
  print("Model successfully loaded.")
32
 
33
  def generate_unique_filename(extension):
 
40
  print(f"Removed file: {file}")
41
 
42
  def download_youtube_audio(url):
43
+ """Downloads audio from a YouTube video and converts it to WAV format."""
44
  print(f"Downloading audio from YouTube: {url}")
45
  output_path = generate_unique_filename(".wav")
46
+
47
  ydl_opts = {
48
  'format': 'bestaudio/best',
49
  'postprocessors': [{
50
  'key': 'FFmpegExtractAudio',
51
  'preferredcodec': 'wav',
52
+ 'preferredquality': '192',
53
  }],
54
  'outtmpl': output_path,
55
  }
56
+
57
  try:
58
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
59
  ydl.download([url])
60
+ if os.path.exists(output_path + ".wav"):
61
+ os.rename(output_path + ".wav", output_path) # Ensure correct naming
62
  except Exception as e:
63
  return f"Error downloading audio: {str(e)}"
64
 
65
+ return output_path if os.path.exists(output_path) else "Download Failed"
 
 
 
66
 
67
  def transcribe_audio(file_path):
68
+ """Transcribes audio using `insanely-fast-whisper` and handles large files efficiently."""
69
  print(f"Starting transcription of file: {file_path}")
70
  temp_audio = None
71
+
72
  if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
73
  print("Video file detected. Extracting audio using ffmpeg...")
74
  temp_audio = generate_unique_filename(".wav")
75
  command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
76
  subprocess.run(command, check=True)
77
+ file_path = temp_audio # Use extracted audio file
78
+
79
  output_file = generate_unique_filename(".json")
80
  command = [
81
  "insanely-fast-whisper",
 
86
  "--timestamp", "chunk",
87
  "--transcript-path", output_file
88
  ]
89
+
90
  try:
91
  subprocess.run(command, check=True)
92
  except Exception as e:
93
  return f"Error in transcription: {str(e)}"
94
 
95
+ # Process the JSON file in chunks to avoid memory overflow
96
+ result = []
97
+ try:
98
+ with open(output_file, "r") as f:
99
+ for line in f:
100
+ chunk = json.loads(line.strip()) # Read JSON line by line
101
+ result.append(chunk.get("text", ""))
102
+ except Exception as e:
103
+ return f"Error reading transcription file: {str(e)}"
104
+
105
  cleanup_files(output_file)
106
  if temp_audio:
107
  cleanup_files(temp_audio)
108
 
109
+ return " ".join(result)[:500000] # Limit transcription size
110
 
111
  def generate_summary_stream(transcription):
112
+ """Summarizes the transcription efficiently to avoid memory overflow."""
113
+ detected_language = langdetect.detect(transcription[:1000]) # Detect using a smaller portion
114
+
115
+ # Use smaller chunks for processing
116
+ chunk_size = 2000
117
+ transcript_chunks = [transcription[i:i+chunk_size] for i in range(0, len(transcription), chunk_size)]
118
+ summary_result = []
119
+
120
+ for chunk in transcript_chunks[:3]: # Process only the first 3 chunks to avoid OOM
121
+ prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:\n{chunk}"""
122
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
123
+ output_ids = model.generate(input_ids, max_length=300) # Limit output size
124
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
125
+ summary_result.append(response)
126
+
127
+ return "\n\n".join(summary_result)
128
 
129
  def process_youtube(url):
130
+ """Handles YouTube video processing: downloads audio, transcribes it, and cleans up."""
131
  if not url:
132
  return "Please enter a YouTube URL.", None
133
+
134
  audio_file = download_youtube_audio(url)
135
+ if "Error" in audio_file or audio_file == "Download Failed":
136
  return audio_file, None
137
+
138
  transcription = transcribe_audio(audio_file)
139
+ cleanup_files(audio_file) # Clean up the downloaded file
140
  return transcription, None
141
 
142
  def process_uploaded_video(video_path):
143
+ """Processes uploaded video file for transcription."""
144
  transcription = transcribe_audio(video_path)
145
  return transcription, None
146
 
 
149
  # 🎥 Video Transcription and Smart Summary
150
  Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
151
  """)
152
+
153
  with gr.Tabs():
154
  with gr.TabItem("📤 Video Upload"):
155
  video_input = gr.Video()
 
158
  with gr.TabItem("🔗 YouTube Link"):
159
  url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
160
  url_button = gr.Button("🚀 Process URL")
161
+
162
  transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
163
  summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
164
  summary_button = gr.Button("📝 Generate Summary")
165
+
166
  video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
167
  url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
168
  summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])