samir-fama commited on
Commit
d9019d1
ยท
1 Parent(s): fa23a9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -9,17 +9,17 @@ import os
9
  import re
10
 
11
 
12
- model = whisper.load_model("base")
13
-
14
- # def compress_audio(file_path, bitrate='32k'):
15
- # try:
16
- # audio = AudioSegment.from_file(file_path)
17
- # output_format = os.path.splitext(file_path)[1][1:]
18
- # compressed_audio = audio.export(file_path, format=output_format, bitrate=bitrate)
19
- # return True
20
- # except Exception as e:
21
- # print(f"Error: {e}")
22
- # return False
23
 
24
  def url_to_text(url):
25
  if url != '':
@@ -35,7 +35,7 @@ def url_to_text(url):
35
  base, ext = os.path.splitext(out_file)
36
  os.rename(out_file, base+'.mp3')
37
  file_path = base+'.mp3'
38
- # compress_audio(file_path)
39
 
40
  result = model.transcribe(file_path)
41
  return result['text'].strip()
@@ -49,14 +49,14 @@ def get_summary(article):
49
  return b
50
 
51
  with gr.Blocks() as demo:
52
- gr.Markdown("<center><h1>Samir's AI Model Implementation - Automatic Speech Recognition</h1></center>")
53
- gr.Markdown("<center><h2>YouTube Audio AutoTranscribe: Effortless Transcription</h2></center>")
54
- gr.Markdown("<center><b>This application is using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a>. Whisper is an intricately designed <br>neural network aiming to achieve the highest precision in the field of multilingual speech recognition.</b></center>")
55
- gr.Markdown("<center><b>The time for the model to perform transcription typically takes around 10 seconds for every 1 minute of video. <br>For example, a 12-minute video would take approximately 120 seconds to transcribe the audio content.</b></center>")
56
 
57
- input_text_url = gr.Textbox(placeholder='๐Ÿ‘‡Youtube Video URL๐Ÿ‘‡', label='YouTube URL')
58
  result_button_transcribe = gr.Button('Transcribe Now')
59
- output_text_transcribe = gr.Textbox(placeholder='Transcription of the YouTube video.', label='Transcript')
60
 
61
  result_button_transcribe.click(url_to_text, inputs = input_text_url, outputs = output_text_transcribe)
62
 
 
9
  import re
10
 
11
 
12
+ model = whisper.load_model("tiny")
13
+
14
+ def compress_audio(file_path, bitrate='32k'):
15
+ try:
16
+ audio = AudioSegment.from_file(file_path)
17
+ output_format = os.path.splitext(file_path)[1][1:]
18
+ compressed_audio = audio.export(file_path, format=output_format, bitrate=bitrate)
19
+ return True
20
+ except Exception as e:
21
+ print(f"Error: {e}")
22
+ return False
23
 
24
  def url_to_text(url):
25
  if url != '':
 
35
  base, ext = os.path.splitext(out_file)
36
  os.rename(out_file, base+'.mp3')
37
  file_path = base+'.mp3'
38
+ compress_audio(file_path)
39
 
40
  result = model.transcribe(file_path)
41
  return result['text'].strip()
 
49
  return b
50
 
51
  with gr.Blocks() as demo:
52
+ gr.Markdown("<h1>Samir's AI Model Implementation - Automatic Speech Recognition</h1>")
53
+ gr.Markdown("<h2>YouTube Audio AutoTranscribe: Effortless Transcription</h2>")
54
+ gr.Markdown("<b>This application is using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a>. Whisper is an intricately designed <br>neural network aiming to achieve the highest precision in the field of multilingual speech recognition.</b>")
55
+ gr.Markdown("<b>The time for the model to perform transcription typically takes around 10 seconds for every 1 minute of video. <br>For example, a 12-minute video would take approximately 120 seconds to transcribe the audio content.</b>")
56
 
57
+ input_text_url = gr.Textbox(placeholder='Youtube Video URL', label='๐Ÿ‘‡YouTube URL๐Ÿ‘‡')
58
  result_button_transcribe = gr.Button('Transcribe Now')
59
+ output_text_transcribe = gr.Textbox(placeholder='Transcription of the YouTube video.', label='๐Ÿ‘‡Transcription๐Ÿ‘‡')
60
 
61
  result_button_transcribe.click(url_to_text, inputs = input_text_url, outputs = output_text_transcribe)
62