JaganathC commited on
Commit
e6813af
·
verified ·
1 Parent(s): a4ecef9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -14
app.py CHANGED
@@ -4,21 +4,23 @@ import yt_dlp
4
  import os
5
  import subprocess
6
  import json
7
- from threading import Thread
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
- import spaces
10
  import time
11
  import langdetect
12
  import uuid
 
13
 
14
- HF_TOKEN = os.environ.get("HF_TOKEN")
15
- print("Starting the program...")
16
 
 
17
  model_path = "Qwen/Qwen2.5-7B-Instruct"
18
- print(f"Loading model {model_path}...")
 
 
 
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
20
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
21
- model = model.eval()
22
  print("Model successfully loaded.")
23
 
24
  def generate_unique_filename(extension):
@@ -33,6 +35,7 @@ def cleanup_files(*files):
33
  def download_youtube_audio(url):
34
  print(f"Downloading audio from YouTube: {url}")
35
  output_path = generate_unique_filename(".wav")
 
36
  ydl_opts = {
37
  'format': 'bestaudio/best',
38
  'postprocessors': [{
@@ -40,10 +43,13 @@ def download_youtube_audio(url):
40
  'preferredcodec': 'wav',
41
  }],
42
  'outtmpl': output_path,
43
- 'keepvideo': True,
44
  }
45
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
46
- ydl.download([url])
 
 
 
 
47
 
48
  if os.path.exists(output_path + ".wav"):
49
  os.rename(output_path + ".wav", output_path)
@@ -53,6 +59,7 @@ def download_youtube_audio(url):
53
  def transcribe_audio(file_path):
54
  print(f"Starting transcription of file: {file_path}")
55
  temp_audio = None
 
56
  if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
57
  print("Video file detected. Extracting audio using ffmpeg...")
58
  temp_audio = generate_unique_filename(".wav")
@@ -70,7 +77,11 @@ def transcribe_audio(file_path):
70
  "--timestamp", "chunk",
71
  "--transcript-path", output_file
72
  ]
73
- subprocess.run(command, check=True)
 
 
 
 
74
 
75
  with open(output_file, "r") as f:
76
  transcription = json.load(f)
@@ -88,13 +99,20 @@ def generate_summary_stream(transcription):
88
  prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:
89
  {transcription[:300000]}..."""
90
 
91
- response, history = model.chat(tokenizer, prompt, history=[])
 
 
 
92
  return response
93
 
94
  def process_youtube(url):
95
  if not url:
96
  return "Please enter a YouTube URL.", None
 
97
  audio_file = download_youtube_audio(url)
 
 
 
98
  transcription = transcribe_audio(audio_file)
99
  cleanup_files(audio_file)
100
  return transcription, None
@@ -126,4 +144,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
126
  url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
127
  summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
128
 
129
- demo.launch()
 
4
  import os
5
  import subprocess
6
  import json
 
 
 
7
  import time
8
  import langdetect
9
  import uuid
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
 
12
+ # Load Hugging Face Token
13
+ HF_TOKEN = os.getenv("HF_TOKEN")
14
 
15
+ print("Starting the program...")
16
  model_path = "Qwen/Qwen2.5-7B-Instruct"
17
+
18
+ # Check if CUDA is available
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ print(f"Using device: {device}")
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
23
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True).to(device).eval()
 
24
  print("Model successfully loaded.")
25
 
26
  def generate_unique_filename(extension):
 
35
  def download_youtube_audio(url):
36
  print(f"Downloading audio from YouTube: {url}")
37
  output_path = generate_unique_filename(".wav")
38
+
39
  ydl_opts = {
40
  'format': 'bestaudio/best',
41
  'postprocessors': [{
 
43
  'preferredcodec': 'wav',
44
  }],
45
  'outtmpl': output_path,
 
46
  }
47
+
48
+ try:
49
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
50
+ ydl.download([url])
51
+ except Exception as e:
52
+ return f"Error downloading audio: {str(e)}"
53
 
54
  if os.path.exists(output_path + ".wav"):
55
  os.rename(output_path + ".wav", output_path)
 
59
  def transcribe_audio(file_path):
60
  print(f"Starting transcription of file: {file_path}")
61
  temp_audio = None
62
+
63
  if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
64
  print("Video file detected. Extracting audio using ffmpeg...")
65
  temp_audio = generate_unique_filename(".wav")
 
77
  "--timestamp", "chunk",
78
  "--transcript-path", output_file
79
  ]
80
+
81
+ try:
82
+ subprocess.run(command, check=True)
83
+ except Exception as e:
84
+ return f"Error in transcription: {str(e)}"
85
 
86
  with open(output_file, "r") as f:
87
  transcription = json.load(f)
 
99
  prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:
100
  {transcription[:300000]}..."""
101
 
102
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
103
+ output_ids = model.generate(input_ids, max_length=500)
104
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
105
+
106
  return response
107
 
108
  def process_youtube(url):
109
  if not url:
110
  return "Please enter a YouTube URL.", None
111
+
112
  audio_file = download_youtube_audio(url)
113
+ if "Error" in audio_file:
114
+ return audio_file, None
115
+
116
  transcription = transcribe_audio(audio_file)
117
  cleanup_files(audio_file)
118
  return transcription, None
 
144
  url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
145
  summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
146
 
147
+ demo.launch()