File size: 8,420 Bytes
ee377d8
efa4923
 
 
 
 
 
0064167
efa4923
 
972a238
 
 
0064167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972a238
 
 
 
 
 
 
 
 
efa4923
972a238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee377d8
972a238
 
 
 
 
 
 
 
 
ee377d8
 
972a238
 
 
 
 
ee377d8
972a238
efa4923
972a238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efa4923
 
972a238
0064167
 
efa4923
972a238
efa4923
972a238
0064167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972a238
 
0064167
972a238
 
 
0064167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972a238
 
0064167
972a238
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import streamlit as st
import moviepy.editor as mp
import speech_recognition as sr
from pydub import AudioSegment
import tempfile
import os
import io
import requests
from transformers import pipeline
import matplotlib.pyplot as plt
import librosa
import numpy as np

# Function to download file from URL
def download_file(url):
    try:
        extension = os.path.splitext(url)[1]
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            for chunk in r.iter_content(chunk_size=8192):
                temp_file.write(chunk)
        temp_file.close()
        return temp_file.name
    except Exception as e:
        st.error(f"Failed to download file: {e}")
        return None

# Function to convert video to audio
def video_to_audio(video_file):
    video = mp.VideoFileClip(video_file)
    audio = video.audio
    temp_audio_path = tempfile.mktemp(suffix=".mp3")
    audio.write_audiofile(temp_audio_path)
    return temp_audio_path

# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file):
    audio = AudioSegment.from_mp3(mp3_file)
    temp_wav_path = tempfile.mktemp(suffix=".wav")
    audio.export(temp_wav_path, format="wav")
    return temp_wav_path

# Function to transcribe audio with chunking for large files
def transcribe_audio(audio_file):
    audio = AudioSegment.from_wav(audio_file)
    duration = len(audio) / 1000  # Duration in seconds
    chunk_length = 60  # 60-second chunks
    recognizer = sr.Recognizer()
    
    if duration <= chunk_length:
        with sr.AudioFile(audio_file) as source:
            audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Audio could not be understood."
        except sr.RequestError:
            return "Could not request results from Google Speech Recognition service."
    else:
        num_chunks = int(duration // chunk_length) + 1
        transcriptions = []
        for i in range(num_chunks):
            start_time = i * chunk_length * 1000  # in milliseconds
            end_time = min((i + 1) * chunk_length * 1000, len(audio))
            chunk = audio[start_time:end_time]
            frame_data = chunk.raw_data
            sample_rate = audio.frame_rate
            sample_width = audio.sample_width
            audio_data = sr.AudioData(frame_data, sample_rate, sample_width)
            try:
                text = recognizer.recognize_google(audio_data)
                transcriptions.append(text)
            except sr.UnknownValueError:
                transcriptions.append("[Audio could not be understood.]")
            except sr.RequestError:
                transcriptions.append("[Could not request results.]")
        return " ".join(transcriptions)

# Function to detect emotions
def detect_emotion(text):
    emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
    result = emotion_pipeline(text)
    emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
    return emotions

# Function to plot audio waveform
def plot_waveform(audio_data, duration=10):
    audio_data.seek(0)
    y, sr = librosa.load(audio_data, sr=None, duration=duration)
    plt.figure(figsize=(10, 4))
    time = np.linspace(0, len(y)/sr, len(y))
    plt.plot(time, y)
    plt.title(f"Audio Waveform (first {duration} seconds)")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    st.pyplot(plt)

# Streamlit app layout
st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
st.write("Upload a video or audio file, or provide a URL to a large file (up to 1GB).")
st.write("**Note:** Direct file uploads are limited to 200MB. For larger files, please provide a URL.")

tab = st.selectbox("Select file type", ["Video", "Audio"])

if tab == "Video":
    method = st.radio("Choose how to provide the video file:", ["Upload file", "Provide URL"])
    if method == "Upload file":
        uploaded_file = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
    elif method == "Provide URL":
        url = st.text_input("Enter video URL")
    if st.button("Analyze Video"):
        if method == "Upload file" and uploaded_file:
            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
                tmp_file.write(uploaded_file.read())
                file_path = tmp_file.name
        elif method == "Provide URL" and url:
            with st.spinner("Downloading video... This may take a while for large files."):
                file_path = download_file(url)
                if file_path is None:
                    st.error("Failed to download the file. Please check the URL and try again.")
                    st.stop()
        else:
            st.error("Please provide a file or URL.")
            st.stop()
        # Process the video file
        with st.spinner("Processing video..."):
            audio_file = video_to_audio(file_path)
            wav_audio_file = convert_mp3_to_wav(audio_file)
            transcription = transcribe_audio(wav_audio_file)
            st.text_area("Transcription", transcription, height=300)
            emotions = detect_emotion(transcription)
            st.write(f"Detected Emotions: {emotions}")
            with open(wav_audio_file, "rb") as f:
                audio_data = io.BytesIO(f.read())
                st.session_state.wav_audio_file = audio_data
            plot_waveform(st.session_state.wav_audio_file)
            # Cleanup
            os.remove(file_path)
            os.remove(audio_file)
            os.remove(wav_audio_file)
    if 'wav_audio_file' in st.session_state:
        st.audio(st.session_state.wav_audio_file, format='audio/wav')
        st.download_button("Download Transcription", transcription, "transcription.txt", "text/plain")
        st.download_button("Download Audio", st.session_state.wav_audio_file, "converted_audio.wav", "audio/wav")

elif tab == "Audio":
    method = st.radio("Choose how to provide the audio file:", ["Upload file", "Provide URL"])
    if method == "Upload file":
        uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
    elif method == "Provide URL":
        url = st.text_input("Enter audio URL")
    if st.button("Analyze Audio"):
        if method == "Upload file" and uploaded_file:
            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3' if uploaded_file.type == "audio/mpeg" else '.wav') as tmp_file:
                tmp_file.write(uploaded_file.read())
                file_path = tmp_file.name
        elif method == "Provide URL" and url:
            with st.spinner("Downloading audio... This may take a while for large files."):
                file_path = download_file(url)
                if file_path is None:
                    st.error("Failed to download the file. Please check the URL and try again.")
                    st.stop()
        else:
            st.error("Please provide a file or URL.")
            st.stop()
        # Process the audio file
        with st.spinner("Processing audio..."):
            if file_path.endswith('.mp3'):
                wav_audio_file = convert_mp3_to_wav(file_path)
            else:
                wav_audio_file = file_path
            transcription = transcribe_audio(wav_audio_file)
            st.text_area("Transcription", transcription, height=300)
            emotions = detect_emotion(transcription)
            st.write(f"Detected Emotions: {emotions}")
            with open(wav_audio_file, "rb") as f:
                audio_data = io.BytesIO(f.read())
                st.session_state.wav_audio_file_audio = audio_data
            plot_waveform(st.session_state.wav_audio_file_audio)
            # Cleanup
            if file_path != wav_audio_file:
                os.remove(file_path)
            os.remove(wav_audio_file)
    if 'wav_audio_file_audio' in st.session_state:
        st.audio(st.session_state.wav_audio_file_audio, format='audio/wav')
        st.download_button("Download Transcription", transcription, "transcription_audio.txt", "text/plain")
        st.download_button("Download Audio", st.session_state.wav_audio_file_audio, "converted_audio_audio.wav", "audio/wav")