cstr commited on
Commit
a96aeb1
·
verified ·
1 Parent(s): f2e0bcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -63
app.py CHANGED
@@ -10,7 +10,7 @@ from urllib.parse import urlparse
10
  # Clone and install faster-whisper from GitHub
11
  subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
12
  subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
13
- subprocess.run(["pip", "install", "yt-dlp"], check=True)
14
 
15
  # Add the faster-whisper directory to the Python path
16
  sys.path.append("./faster-whisper")
@@ -21,71 +21,134 @@ import yt_dlp
21
 
22
  def download_audio(url):
23
  parsed_url = urlparse(url)
24
- if parsed_url.netloc == 'www.youtube.com' or parsed_url.netloc == 'youtu.be':
25
- # YouTube video
26
- ydl_opts = {
27
- 'format': 'bestaudio/best',
28
- 'postprocessors': [{
29
- 'key': 'FFmpegExtractAudio',
30
- 'preferredcodec': 'mp3',
31
- 'preferredquality': '192',
32
- }],
33
- 'outtmpl': '%(id)s.%(ext)s',
34
- }
35
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
36
- info = ydl.extract_info(url, download=True)
37
- return f"{info['id']}.mp3"
38
  else:
39
- # Direct MP3 URL
40
- response = requests.get(url)
41
- if response.status_code == 200:
42
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
43
- temp_file.write(response.content)
44
- return temp_file.name
45
- else:
46
- raise Exception(f"Failed to download audio from {url}")
47
 
48
- def transcribe_audio(input_source, batch_size):
49
- # Initialize the model
50
- model = WhisperModel("cstr/whisper-large-v3-turbo-int8_float32", device="auto", compute_type="int8")
51
- batched_model = BatchedInferencePipeline(model=model)
52
-
53
- # Handle input source
54
- if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
55
- # It's a URL, download the audio
56
- audio_path = download_audio(input_source)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  else:
58
- # It's a local file path
59
- audio_path = input_source
60
-
61
- # Benchmark transcription time
62
- start_time = time.time()
63
- segments, info = batched_model.transcribe(audio_path, batch_size=batch_size)
64
- end_time = time.time()
65
-
66
- # Generate transcription
67
- transcription = ""
68
- for segment in segments:
69
- transcription += f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
70
-
71
- # Calculate metrics
72
- transcription_time = end_time - start_time
73
- real_time_factor = info.duration / transcription_time
74
- audio_file_size = os.path.getsize(audio_path) / (1024 * 1024) # Size in MB
75
-
76
- # Prepare output
77
- output = f"Transcription:\n\n{transcription}\n"
78
- output += f"\nLanguage: {info.language}, Probability: {info.language_probability:.2f}\n"
79
- output += f"Duration: {info.duration:.2f}s, Duration after VAD: {info.duration_after_vad:.2f}s\n"
80
- output += f"Transcription time: {transcription_time:.2f} seconds\n"
81
- output += f"Real-time factor: {real_time_factor:.2f}x\n"
82
- output += f"Audio file size: {audio_file_size:.2f} MB"
83
-
84
- # Clean up downloaded file if it was a URL
85
- if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
86
- os.remove(audio_path)
87
-
88
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # Gradio interface
91
  iface = gr.Interface(
@@ -102,6 +165,7 @@ iface = gr.Interface(
102
  ["https://example.com/path/to/audio.mp3", 16],
103
  ["path/to/local/audio.mp3", 16]
104
  ],
 
105
  )
106
 
107
  iface.launch()
 
10
  # Clone and install faster-whisper from GitHub
11
  subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
12
  subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
13
+ subprocess.run(["pip", "install", "yt-dlp pytube ffmpeg-python"], check=True)
14
 
15
  # Add the faster-whisper directory to the Python path
16
  sys.path.append("./faster-whisper")
 
21
 
22
  def download_audio(url):
23
  parsed_url = urlparse(url)
24
+ if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
25
+ return download_youtube_audio(url)
 
 
 
 
 
 
 
 
 
 
 
 
26
  else:
27
+ return download_direct_audio(url)
 
 
 
 
 
 
 
28
 
29
+ def download_youtube_audio(url):
30
+ methods = [
31
+ youtube_dl_method,
32
+ pytube_method,
33
+ youtube_dl_alternative_method,
34
+ ffmpeg_method
35
+ ]
36
+
37
+ for method in methods:
38
+ try:
39
+ return method(url)
40
+ except Exception as e:
41
+ print(f"Method {method.__name__} failed: {str(e)}")
42
+
43
+ raise Exception("All download methods failed. Please try a different video or a direct audio URL.")
44
+
45
+ def youtube_dl_method(url):
46
+ ydl_opts = {
47
+ 'format': 'bestaudio/best',
48
+ 'postprocessors': [{
49
+ 'key': 'FFmpegExtractAudio',
50
+ 'preferredcodec': 'mp3',
51
+ 'preferredquality': '192',
52
+ }],
53
+ 'outtmpl': '%(id)s.%(ext)s',
54
+ }
55
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
56
+ info = ydl.extract_info(url, download=True)
57
+ return f"{info['id']}.mp3"
58
+
59
+ def pytube_method(url):
60
+ from pytube import YouTube
61
+ yt = YouTube(url)
62
+ audio_stream = yt.streams.filter(only_audio=True).first()
63
+ out_file = audio_stream.download()
64
+ base, ext = os.path.splitext(out_file)
65
+ new_file = base + '.mp3'
66
+ os.rename(out_file, new_file)
67
+ return new_file
68
+
69
+ def youtube_dl_alternative_method(url):
70
+ ydl_opts = {
71
+ 'format': 'bestaudio/best',
72
+ 'postprocessors': [{
73
+ 'key': 'FFmpegExtractAudio',
74
+ 'preferredcodec': 'mp3',
75
+ 'preferredquality': '192',
76
+ }],
77
+ 'outtmpl': '%(id)s.%(ext)s',
78
+ 'no_warnings': True,
79
+ 'quiet': True,
80
+ 'no_check_certificate': True,
81
+ 'prefer_insecure': True,
82
+ 'nocheckcertificate': True,
83
+ }
84
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
85
+ info = ydl.extract_info(url, download=True)
86
+ return f"{info['id']}.mp3"
87
+
88
+ def ffmpeg_method(url):
89
+ output_file = tempfile.mktemp(suffix='.mp3')
90
+ command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
91
+ subprocess.run(command, check=True, capture_output=True)
92
+ return output_file
93
+
94
+ def download_direct_audio(url):
95
+ response = requests.get(url)
96
+ if response.status_code == 200:
97
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
98
+ temp_file.write(response.content)
99
+ return temp_file.name
100
  else:
101
+ raise Exception(f"Failed to download audio from {url}")
102
+
103
+ def transcribe_audio(input_source, batch_size):
104
+ try:
105
+ # Initialize the model
106
+ model = WhisperModel("cstr/whisper-large-v3-turbo-int8_float32", device="auto", compute_type="int8")
107
+ batched_model = BatchedInferencePipeline(model=model)
108
+
109
+ # Handle input source
110
+ if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
111
+ # It's a URL, download the audio
112
+ audio_path = download_audio(input_source)
113
+ else:
114
+ # It's a local file path
115
+ audio_path = input_source
116
+
117
+ # Benchmark transcription time
118
+ start_time = time.time()
119
+ segments, info = batched_model.transcribe(audio_path, batch_size=batch_size)
120
+ end_time = time.time()
121
+
122
+ # Generate transcription
123
+ transcription = ""
124
+ for segment in segments:
125
+ transcription += f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
126
+
127
+ # Calculate metrics
128
+ transcription_time = end_time - start_time
129
+ real_time_factor = info.duration / transcription_time
130
+ audio_file_size = os.path.getsize(audio_path) / (1024 * 1024) # Size in MB
131
+
132
+ # Prepare output
133
+ output = f"Transcription:\n\n{transcription}\n"
134
+ output += f"\nLanguage: {info.language}, Probability: {info.language_probability:.2f}\n"
135
+ output += f"Duration: {info.duration:.2f}s, Duration after VAD: {info.duration_after_vad:.2f}s\n"
136
+ output += f"Transcription time: {transcription_time:.2f} seconds\n"
137
+ output += f"Real-time factor: {real_time_factor:.2f}x\n"
138
+ output += f"Audio file size: {audio_file_size:.2f} MB"
139
+
140
+ return output
141
+
142
+ except Exception as e:
143
+ return f"An error occurred: {str(e)}"
144
+
145
+ finally:
146
+ # Clean up downloaded file if it was a URL
147
+ if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
148
+ try:
149
+ os.remove(audio_path)
150
+ except:
151
+ pass
152
 
153
  # Gradio interface
154
  iface = gr.Interface(
 
165
  ["https://example.com/path/to/audio.mp3", 16],
166
  ["path/to/local/audio.mp3", 16]
167
  ],
168
+ cache_examples=False # Prevents automatic processing of examples
169
  )
170
 
171
  iface.launch()