cfc-tech commited on
Commit
dec5a5d
·
verified ·
1 Parent(s): a5434c3
Files changed (1) hide show
  1. app.py +36 -49
app.py CHANGED
@@ -1,57 +1,44 @@
1
  import gradio as gr
2
  from pytube import YouTube
3
  import subprocess
4
- from huggingsound import SpeechRecognitionModel
5
  import torch
 
 
 
6
  from transformers import pipeline
7
 
8
- def process_video(video_url):
9
- response = {
10
- 'status': 'Success',
11
- 'message': '',
12
- 'data': ''
13
- }
14
-
15
- try:
16
- yt = YouTube(video_url)
17
- audio_file = yt.streams.filter(only_audio=True, file_extension='mp4').first().download(filename='ytaudio.mp4')
18
- subprocess.run(['ffmpeg', '-i', 'ytaudio.mp4', '-acodec', 'pcm_s16le', '-ar', '16000', 'ytaudio.wav'], check=True)
19
- except Exception as e:
20
- response['status'] = 'Error'
21
- response['message'] = f'Failed to download and convert video: {str(e)}'
22
- return response
23
-
24
- try:
25
- device = "cuda" if torch.cuda.is_available() else "cpu"
26
- model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english", device=device)
27
- transcription = model.transcribe(['ytaudio.wav'])[0]['transcription']
28
- except Exception as e:
29
- response['status'] = 'Error'
30
- response['message'] = f'Failed during speech recognition: {str(e)}'
31
- return response
32
-
33
- try:
34
- summarization = pipeline('summarization')
35
- summarized_text = summarization(transcription, max_length=130, min_length=30, do_sample=False)
36
- response['data'] = summarized_text[0]['summary_text']
37
- except Exception as e:
38
- response['status'] = 'Error'
39
- response['message'] = f'Failed during summarization: {str(e)}'
40
- return response
41
-
42
- return response
43
 
44
- iface = gr.Interface(
45
- fn=process_video,
46
- inputs=gr.inputs.Textbox(lines=2, placeholder="Enter YouTube Video URL Here..."),
47
- outputs=[
48
- gr.outputs.Textbox(label="Status"),
49
- gr.outputs.Textbox(label="Message"),
50
- gr.outputs.Textbox(label="Summary")
51
- ],
52
- title="YouTube Video Summarizer",
53
- description="This tool extracts audio from a YouTube video, transcribes it, and provides a summary.",
54
- enable_queue=True # Enable request queuing
55
- )
56
 
57
- iface.launch()
 
 
1
  import gradio as gr
2
  from pytube import YouTube
3
  import subprocess
 
4
  import torch
5
+ from huggingsound import SpeechRecognitionModel
6
+ import librosa
7
+ import soundfile as sf
8
  from transformers import pipeline
9
 
10
+ def summarize_video(youtube_link):
11
+ # Download YouTube video's audio
12
+ yt = YouTube(youtube_link)
13
+ yt.streams.filter(only_audio=True, file_extension='mp4').first().download(filename='ytaudio.mp4')
14
+
15
+ # Convert to WAV format
16
+ subprocess.run(['ffmpeg', '-i', 'ytaudio.mp4', '-acodec', 'pcm_s16le', '-ar', '16000', 'ytaudio.wav'], check=True)
17
+
18
+ # Initialize speech recognition model
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english", device=device)
21
+
22
+ # Process audio file and transcribe
23
+ input_file = 'ytaudio.wav'
24
+ stream = librosa.stream(input_file, block_length=30, frame_length=16000, hop_length=16000)
25
+ full_transcript = ''
26
+ for i, speech in enumerate(stream):
27
+ sf.write(f'{i}.wav', speech, 16000)
28
+ transcription = model.transcribe([f'{i}.wav'])
29
+ full_transcript += ' '.join([item['transcription'] for item in transcription])
30
+
31
+ # Summarize the transcript
32
+ summarizer = pipeline('summarization')
33
+ summarized_text = summarizer(full_transcript, max_length=130, min_length=30, do_sample=False)
34
+ return summarized_text[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Set up the Gradio interface
37
+ iface = gr.Interface(fn=summarize_video,
38
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter YouTube Video Link Here..."),
39
+ outputs="text",
40
+ title="YouTube Video Text Summarizer",
41
+ description="This tool summarizes the text extracted from a given YouTube video. Please enter the video link below.")
 
 
 
 
 
 
42
 
43
+ if __name__ == "__main__":
44
+ iface.launch()