Spaces:
Runtime error
Runtime error
File size: 6,638 Bytes
90ddd8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import subprocess
# # Run the pip install command
subprocess.check_call(['pip', 'install', 'wordcloud'])
subprocess.check_call(['pip', 'install', 'git+https://github.com/openai/whisper.git'])
subprocess.check_call(['pip', 'install', 'transformers'])
subprocess.check_call(['pip', 'install', 'imageio==2.4.1'])
subprocess.check_call(['pip', 'install', 'moviepy'])
subprocess.check_call(['pip', 'install', 'keybert'])
subprocess.check_call(['pip', 'install', 'pytube'])
import streamlit as st
import os
from wordcloud import WordCloud
from keybert import KeyBERT
import pandas as pd
import matplotlib.pyplot as plt
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
from moviepy.editor import *
from tqdm import tqdm
import os
import math
import nltk
nltk.download('punkt')
import whisper
from transformers import pipeline
from pytube import YouTube
def process_video(path):
whisper_model = whisper.load_model("base")
def SpeechToTextEng(aud_path):
result = whisper_model.transcribe(aud_path)
return result["text"]
def run_range(duration):
time=duration/60
floor=math.ceil(time)
return floor
time_range=60
clip_run_range=0
clip_duration=0
def audio_generator(path,aud=0,vid=0):
if vid==1:
clip=VideoFileClip(path)
clip_duration = clip.duration
clip_run_range=run_range(clip_duration)
for i in range(clip_run_range):
left=i*time_range
right=left+time_range
# print(left,right)
crop_clip=clip.subclip(left,right)
try:
crop_clip.audio.write_audiofile("vid_to_aud"+str(i)+".mp3")
except:
pass
if aud==1:
audio_clip=AudioFileClip(path)
clip_duration = audio_clip.duration
print(clip_duration)
clip_run_range=run_range(clip_duration)
print(clip_run_range)
for i in range(clip_run_range):
left=i*time_range
right=left+time_range
# print(left,right)
crop_clip=audio_clip.subclip(left,right)
try:
crop_clip.write_audiofile("vid_to_aud"+str(i)+".mp3")
except:
pass
# YouTube video URL
video_url = path
# Create a YouTube object
yt = YouTube(video_url)
# Get the highest resolution video stream
stream = yt.streams.get_lowest_resolution()
# Download the video
stream.download(filename='meeting.mp4')
audio_generator("./meeting.mp4",vid=1)
transcribed_lit=[]
label_lit=[]
translated_lit=[]
for i in tqdm(range(clip_run_range)):
transcribed=SpeechToTextEng("./vid_to_aud"+str(i)+".mp3")
transcribed_lit.append(transcribed)
os.remove("./vid_to_aud"+str(i)+".mp3")
data = pd.DataFrame(
{'transcriptions': transcribed_lit
})
summarizer = pipeline("summarization")
sentiment_analyzer = pipeline("sentiment-analysis")
sumarized_lit=[]
sentiment_lit=[]
for i in tqdm(range(len(data))):
summarized=summarizer(data.iloc[i,0],min_length=75, max_length=300)[0]['summary_text']
sentiment = sentiment_analyzer(data.iloc[i,0])[0]['label']
sumarized_lit.append(summarized)
sentiment_lit.append(sentiment)
data['summary']=sumarized_lit
data['sentiment']=sentiment_lit
data.to_csv('output2.csv', index=False)
tot_text=""
for i in range(len(data)):
tot_text=tot_text+data.iloc[i,0]
key_model = KeyBERT('distilbert-base-nli-mean-tokens')
def extract_keywords(text, top_n=50):
keywords = key_model.extract_keywords(text, top_n=top_n)
return [keyword[0] for keyword in keywords]
tot_keywords=extract_keywords(tot_text)
def get_500_words(text,left,right):
words = text.split()
first_500_words = ' '.join(words[left:right])
return first_500_words
def summarize_text(text):
chunk_size = 500 # Number of words per chunk
total_summary = "" # Total summary
words = text.split() # Split the text into individual words
num_chunks = len(words) // chunk_size + 1 # Calculate the number of chunks
for i in tqdm(range(num_chunks)):
start_index = i * chunk_size
end_index = start_index + chunk_size
chunk = " ".join(words[start_index:end_index])
# Pass the chunk to the summarizer (replace with your summarization code)
chunk_summary = summarizer(chunk,min_length=75, max_length=200)[0]['summary_text']
# print(chunk_summary)
total_summary += chunk_summary
return total_summary
tot_summary=summarize_text(tot_text)
return tot_text,tot_summary,tot_keywords
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
def generate_word_cloud(text):
# Create a WordCloud object
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
# Display the generated word cloud
fig, ax = plt.subplots(figsize=(10, 5))
# Plot the word cloud on the axis
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
def main():
st.title("Meeting Summary Web App")
# YouTube link input
youtube_url = st.text_input("Enter the YouTube video link")
if st.button("Process Video"):
if youtube_url:
# Process the YouTube video
tot_text, tot_summary, tot_keywords = process_video(youtube_url)
# Display the output
if os.path.exists("output2.csv"):
output_df = pd.read_csv("output2.csv")
st.subheader("Transcriptions:")
st.write(output_df["transcriptions"])
st.subheader("Labels:")
st.write(output_df["labels"])
st.subheader("Word Cloud:")
generate_word_cloud(output_df["transcriptions"].str.cat(sep=' '))
st.subheader("tot_text:")
st.write(tot_text)
st.subheader("tot_summary:")
st.write(tot_summary)
st.subheader("tot_keywords:")
st.write(tot_keywords)
else:
st.write("No output file found.")
if __name__ == "__main__":
main() |