Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import moviepy.editor as mp
|
5 |
+
from transformers import pipeline
|
6 |
+
import time
|
7 |
+
|
8 |
+
# Load models
|
9 |
+
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-base") # Use a smaller model
|
10 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
11 |
+
|
12 |
+
def process_video(video_path):
|
13 |
+
try:
|
14 |
+
# Extract audio from video
|
15 |
+
start_time = time.time()
|
16 |
+
video_clip = mp.VideoFileClip(video_path)
|
17 |
+
audio_path = "extracted_audio.wav"
|
18 |
+
video_clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
|
19 |
+
print(f"Audio extraction took {time.time() - start_time:.2f} seconds")
|
20 |
+
|
21 |
+
# Transcribe audio to text
|
22 |
+
start_time = time.time()
|
23 |
+
transcription = whisper(audio_path)
|
24 |
+
text = transcription['text']
|
25 |
+
print(f"Transcription took {time.time() - start_time:.2f} seconds")
|
26 |
+
if not text:
|
27 |
+
raise ValueError("Transcription returned empty text.")
|
28 |
+
|
29 |
+
# Summarize text
|
30 |
+
start_time = time.time()
|
31 |
+
summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
|
32 |
+
summary_text = summary[0]['summary_text']
|
33 |
+
print(f"Summarization took {time.time() - start_time:.2f} seconds")
|
34 |
+
if not summary_text:
|
35 |
+
raise ValueError("Summarization returned empty text.")
|
36 |
+
|
37 |
+
return text, summary_text
|
38 |
+
|
39 |
+
except Exception as e:
|
40 |
+
return str(e), ""
|
41 |
+
|
42 |
+
# Gradio Interface
|
43 |
+
iface = gr.Interface(
|
44 |
+
fn=process_video,
|
45 |
+
inputs=gr.Video(label="Upload Video"),
|
46 |
+
outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Summarization")], # Corrected output specification
|
47 |
+
title="Video Transcription and Summarization",
|
48 |
+
description="Upload a video to extract audio, transcribe it to text, and summarize the content."
|
49 |
+
)
|
50 |
+
|
51 |
+
iface.launch()
|