samir-fama commited on
Commit
859958b
·
1 Parent(s): 7c08a77

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from pytube import YouTube
3
+ from pydub import AudioSegment
4
+ import gradio as gr
5
+ import os
6
+ import re
7
+
8
+ model = whisper.load_model("small")
9
+
10
+ # def compress_audio(file_path, bitrate='32k'):
11
+ # try:
12
+ # audio = AudioSegment.from_file(file_path)
13
+ # output_format = os.path.splitext(file_path)[1][1:]
14
+ # compressed_audio = audio.export(file_path, format=output_format, bitrate=bitrate)
15
+ # return True
16
+ # except Exception as e:
17
+ # print(f"Error: {e}")
18
+ # return False
19
+
20
+ def url_to_text(url):
21
+ if url != '':
22
+ output_text_transcribe = ''
23
+
24
+ yt = YouTube(url)
25
+ video = yt.streams.filter(only_audio=True).first()
26
+ out_file=video.download(output_path=".")
27
+ file_stats = os.stat(out_file)
28
+
29
+ if file_stats.st_size <= 30_000_000:
30
+
31
+ base, ext = os.path.splitext(out_file)
32
+ os.rename(out_file, base+'.mp3')
33
+ file_path = base+'.mp3'
34
+ # compress_audio(file_path)
35
+
36
+ result = model.transcribe(file_path)
37
+ return result['text'].strip()
38
+ else:
39
+ raise gr.Error("Exception: Problems with the audio transcription.")
40
+
41
+ def get_summary(article):
42
+ first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
43
+ b = summarizer(first_sentences, min_length = 20, max_length = 120, do_sample = False)
44
+ b = b[0]['summary_text'].replace(' .', '.').strip()
45
+ return b
46
+
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("<h1><center>Samir's AI Model Implementation - Automatic Speech Recognition</center></h1>")
49
+ gr.Markdown("<h2><center>YouTube Audio AutoTranscribe: Effortless Transcription</center></h2>")
50
+ gr.Markdown("<center><b>This application is using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a>. Whisper is an intricately designed <br>neural network aiming to achieve the highest precision in the field of multilingual speech recognition.</b></center>")
51
+ gr.Markdown("<center><b>The time for the model to perform transcription typically takes around 10 seconds for every 1 minute of video. <br>For example, a 12-minute video would take approximately 120 seconds to transcribe the audio content.</b></center>")
52
+
53
+ input_text_url = gr.Textbox(placeholder='👇Youtube Video URL👇', label='YouTube URL')
54
+ result_button_transcribe = gr.Button('Transcribe Now')
55
+ output_text_transcribe = gr.Textbox(placeholder='Transcription of the YouTube video.', label='Transcript')
56
+
57
+ result_button_transcribe.click(url_to_text, inputs = input_text_url, outputs = output_text_transcribe)
58
+
59
+ demo.queue(default_enabled = True).launch(debug = True)