JaganathC commited on
Commit
9bce705
·
verified ·
1 Parent(s): dfc8679

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from moviepy.video.io.VideoFileClip import VideoFileClip
4
+ from pydub import AudioSegment
5
+ import whisper
6
+ from transformers import pipeline, MarianMTModel, MarianTokenizer
7
+ import yt_dlp as youtube_dl
8
+
9
+
10
+ # App Configuration
11
+ st.set_page_config(page_title="Video-to-Text Summarization", layout="centered")
12
+
13
+ # Header
14
+ st.title("🎥 Video-to-Text Summarization App")
15
+ st.markdown("""
16
+ This app helps you:
17
+ - Convert videos into text and summarize them.
18
+ - Extract multilingual transcriptions and translations.
19
+ - Process videos with multiple speakers.
20
+ """)
21
+
22
+ # Temporary video storage
23
+ if "video_path" not in st.session_state:
24
+ st.session_state.video_path = None
25
+
26
+ # 1. Upload Video Section
27
+ st.header("Upload Your Video")
28
+
29
+ # Choose upload option
30
+ upload_option = st.selectbox("Select Upload Method", ["Local", "YouTube URL"])
31
+
32
+ # Upload Local File
33
+ if upload_option == "Local":
34
+ video_file = st.file_uploader("Upload your video file", type=["mp4", "mkv", "avi"])
35
+ if video_file:
36
+ with open("uploaded_video.mp4", "wb") as f:
37
+ f.write(video_file.read())
38
+ st.session_state.video_path = "uploaded_video.mp4"
39
+ st.success("Video uploaded successfully!")
40
+
41
+ # Download Video from YouTube
42
+ elif upload_option == "YouTube URL":
43
+ youtube_url = st.text_input("Enter YouTube URL")
44
+ if youtube_url:
45
+ try:
46
+ os.system(f"yt-dlp -o video.mp4 {youtube_url}")
47
+ st.session_state.video_path = "video.mp4"
48
+ st.success("YouTube video downloaded successfully!")
49
+ except Exception as e:
50
+ st.error(f"Error downloading video: {str(e)}")
51
+
52
+ # 2. Process Video Section (After Upload)
53
+ if st.session_state.video_path:
54
+ st.header("Process Your Video")
55
+ st.write(f"Processing {st.session_state.video_path}...")
56
+
57
+ # Extract Audio from Video
58
+ def extract_audio(video_path):
59
+ try:
60
+ audio = AudioSegment.from_file(video_path)
61
+ audio.export("extracted_audio.mp3", format="mp3")
62
+ st.success("Audio extracted successfully!")
63
+ return "extracted_audio.mp3"
64
+ except Exception as e:
65
+ st.error(f"Error in extracting audio: {str(e)}")
66
+ return None
67
+
68
+ audio_path = extract_audio(st.session_state.video_path)
69
+
70
+ # Real-time Audio Transcription
71
+ def transcribe_audio(audio_path):
72
+ try:
73
+ model = whisper.load_model("base")
74
+ result = model.transcribe(audio_path)
75
+ st.text_area("Transcription", result['text'], height=200)
76
+ return result['text']
77
+ except Exception as e:
78
+ st.error(f"Error in transcription: {str(e)}")
79
+ return None
80
+
81
+ if audio_path:
82
+ transcription = transcribe_audio(audio_path)
83
+
84
+ # 3. Summarize and Translate
85
+ if 'transcription' in locals():
86
+ st.header("Results")
87
+
88
+ # Summarize Text
89
+ def summarize_text(text):
90
+ try:
91
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
92
+ summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
93
+ st.text_area("Summary", summary[0]['summary_text'], height=150)
94
+ return summary[0]['summary_text']
95
+ except Exception as e:
96
+ st.error(f"Error in summarization: {str(e)}")
97
+ return None
98
+
99
+ summary = summarize_text(transcription)
100
+
101
+ # Translate Text
102
+ def translate_text(text, src_lang="en", tgt_lang="es"):
103
+ try:
104
+ model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
105
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
106
+ model = MarianMTModel.from_pretrained(model_name)
107
+ translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
108
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
109
+ st.text_area("Translated Summary", translated_text, height=150)
110
+ return translated_text
111
+ except Exception as e:
112
+ st.error(f"Error in translation: {str(e)}")
113
+ return None
114
+
115
+ target_language = st.selectbox("Select Translation Language", ["es", "fr", "de", "zh"])
116
+ if target_language:
117
+ translated_summary = translate_text(summary, tgt_lang=target_language)
118
+
119
+ else:
120
+ st.info("Please upload a video to start the process.")