Spaces:

samir-fama
/

youtube-video-transcriptor

Running

App Files Files Community

samir-fama commited on Dec 26, 2023

Commit

d9019d1

1 Parent(s): fa23a9e

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -18

app.py CHANGED Viewed

@@ -9,17 +9,17 @@ import os
 import re
-model = whisper.load_model("base")
-# def compress_audio(file_path, bitrate='32k'):
-#     try:
-#         audio = AudioSegment.from_file(file_path)
-#         output_format = os.path.splitext(file_path)[1][1:]
-#         compressed_audio = audio.export(file_path, format=output_format, bitrate=bitrate)
-#         return True
-#     except Exception as e:
-#         print(f"Error: {e}")
-#         return False
 def url_to_text(url):
     if url != '':
@@ -35,7 +35,7 @@ def url_to_text(url):
         base, ext = os.path.splitext(out_file)
         os.rename(out_file, base+'.mp3')
         file_path = base+'.mp3'
-        # compress_audio(file_path)
         result = model.transcribe(file_path)
         return result['text'].strip()
@@ -49,14 +49,14 @@ def get_summary(article):
     return b
 with gr.Blocks() as demo:
-    gr.Markdown("<center><h1>Samir's AI Model Implementation -  Automatic Speech Recognition</h1></center>")
-    gr.Markdown("<center><h2>YouTube Audio AutoTranscribe: Effortless Transcription</h2></center>")
-    gr.Markdown("<center><b>This application is using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a>. Whisper is an intricately designed <br>neural network aiming to achieve the highest precision in the field of multilingual speech recognition.</b></center>")
-    gr.Markdown("<center><b>The time for the model to perform transcription typically takes around 10 seconds for every 1 minute of video. <br>For example, a 12-minute video would take approximately 120 seconds to transcribe the audio content.</b></center>")
-    input_text_url = gr.Textbox(placeholder='👇Youtube Video URL👇', label='YouTube URL')
     result_button_transcribe = gr.Button('Transcribe Now')
-    output_text_transcribe = gr.Textbox(placeholder='Transcription of the YouTube video.', label='Transcript')
     result_button_transcribe.click(url_to_text, inputs = input_text_url, outputs = output_text_transcribe)

 import re
+model = whisper.load_model("tiny")
+def compress_audio(file_path, bitrate='32k'):
+    try:
+        audio = AudioSegment.from_file(file_path)
+        output_format = os.path.splitext(file_path)[1][1:]
+        compressed_audio = audio.export(file_path, format=output_format, bitrate=bitrate)
+        return True
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
 def url_to_text(url):
     if url != '':
         base, ext = os.path.splitext(out_file)
         os.rename(out_file, base+'.mp3')
         file_path = base+'.mp3'
+        compress_audio(file_path)
         result = model.transcribe(file_path)
         return result['text'].strip()
     return b
 with gr.Blocks() as demo:
+    gr.Markdown("<h1>Samir's AI Model Implementation -  Automatic Speech Recognition</h1>")
+    gr.Markdown("<h2>YouTube Audio AutoTranscribe: Effortless Transcription</h2>")
+    gr.Markdown("<b>This application is using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a>. Whisper is an intricately designed <br>neural network aiming to achieve the highest precision in the field of multilingual speech recognition.</b>")
+    gr.Markdown("<b>The time for the model to perform transcription typically takes around 10 seconds for every 1 minute of video. <br>For example, a 12-minute video would take approximately 120 seconds to transcribe the audio content.</b>")
+    input_text_url = gr.Textbox(placeholder='Youtube Video URL', label='👇YouTube URL👇')
     result_button_transcribe = gr.Button('Transcribe Now')
+    output_text_transcribe = gr.Textbox(placeholder='Transcription of the YouTube video.', label='👇Transcription👇')
     result_button_transcribe.click(url_to_text, inputs = input_text_url, outputs = output_text_transcribe)