Spaces:
Runtime error
Runtime error
import subprocess | |
# # Run the pip install command | |
subprocess.check_call(['pip', 'install', 'wordcloud']) | |
subprocess.check_call(['pip', 'install', 'git+https://github.com/openai/whisper.git']) | |
subprocess.check_call(['pip', 'install', 'transformers']) | |
subprocess.check_call(['pip', 'install', 'imageio==2.4.1']) | |
subprocess.check_call(['pip', 'install', 'moviepy']) | |
subprocess.check_call(['pip', 'install', 'keybert']) | |
subprocess.check_call(['pip', 'install', 'pytube']) | |
import streamlit as st | |
import os | |
from wordcloud import WordCloud | |
from keybert import KeyBERT | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
from moviepy.editor import * | |
from tqdm import tqdm | |
import os | |
import math | |
import nltk | |
nltk.download('punkt') | |
import whisper | |
from transformers import pipeline | |
from pytube import YouTube | |
def process_video(path): | |
whisper_model = whisper.load_model("base") | |
def SpeechToTextEng(aud_path): | |
result = whisper_model.transcribe(aud_path) | |
return result["text"] | |
def run_range(duration): | |
time=duration/60 | |
floor=math.ceil(time) | |
return floor | |
time_range=60 | |
clip_run_range=0 | |
clip_duration=0 | |
def audio_generator(path,aud=0,vid=0): | |
if vid==1: | |
clip=VideoFileClip(path) | |
clip_duration = clip.duration | |
clip_run_range=run_range(clip_duration) | |
for i in range(clip_run_range): | |
left=i*time_range | |
right=left+time_range | |
# print(left,right) | |
crop_clip=clip.subclip(left,right) | |
try: | |
crop_clip.audio.write_audiofile("vid_to_aud"+str(i)+".mp3") | |
except: | |
pass | |
if aud==1: | |
audio_clip=AudioFileClip(path) | |
clip_duration = audio_clip.duration | |
print(clip_duration) | |
clip_run_range=run_range(clip_duration) | |
print(clip_run_range) | |
for i in range(clip_run_range): | |
left=i*time_range | |
right=left+time_range | |
# print(left,right) | |
crop_clip=audio_clip.subclip(left,right) | |
try: | |
crop_clip.write_audiofile("vid_to_aud"+str(i)+".mp3") | |
except: | |
pass | |
# YouTube video URL | |
video_url = path | |
# Create a YouTube object | |
yt = YouTube(video_url) | |
# Get the highest resolution video stream | |
stream = yt.streams.get_lowest_resolution() | |
# Download the video | |
stream.download(filename='meeting.mp4') | |
audio_generator("./meeting.mp4",vid=1) | |
transcribed_lit=[] | |
label_lit=[] | |
translated_lit=[] | |
for i in tqdm(range(clip_run_range)): | |
transcribed=SpeechToTextEng("./vid_to_aud"+str(i)+".mp3") | |
transcribed_lit.append(transcribed) | |
os.remove("./vid_to_aud"+str(i)+".mp3") | |
data = pd.DataFrame( | |
{'transcriptions': transcribed_lit | |
}) | |
summarizer = pipeline("summarization") | |
sentiment_analyzer = pipeline("sentiment-analysis") | |
sumarized_lit=[] | |
sentiment_lit=[] | |
for i in tqdm(range(len(data))): | |
summarized=summarizer(data.iloc[i,0],min_length=75, max_length=300)[0]['summary_text'] | |
sentiment = sentiment_analyzer(data.iloc[i,0])[0]['label'] | |
sumarized_lit.append(summarized) | |
sentiment_lit.append(sentiment) | |
data['summary']=sumarized_lit | |
data['sentiment']=sentiment_lit | |
data.to_csv('output2.csv', index=False) | |
tot_text="" | |
for i in range(len(data)): | |
tot_text=tot_text+data.iloc[i,0] | |
key_model = KeyBERT('distilbert-base-nli-mean-tokens') | |
def extract_keywords(text, top_n=50): | |
keywords = key_model.extract_keywords(text, top_n=top_n) | |
return [keyword[0] for keyword in keywords] | |
tot_keywords=extract_keywords(tot_text) | |
def get_500_words(text,left,right): | |
words = text.split() | |
first_500_words = ' '.join(words[left:right]) | |
return first_500_words | |
def summarize_text(text): | |
chunk_size = 500 # Number of words per chunk | |
total_summary = "" # Total summary | |
words = text.split() # Split the text into individual words | |
num_chunks = len(words) // chunk_size + 1 # Calculate the number of chunks | |
for i in tqdm(range(num_chunks)): | |
start_index = i * chunk_size | |
end_index = start_index + chunk_size | |
chunk = " ".join(words[start_index:end_index]) | |
# Pass the chunk to the summarizer (replace with your summarization code) | |
chunk_summary = summarizer(chunk,min_length=75, max_length=200)[0]['summary_text'] | |
# print(chunk_summary) | |
total_summary += chunk_summary | |
return total_summary | |
tot_summary=summarize_text(tot_text) | |
return tot_text,tot_summary,tot_keywords | |
# ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
def generate_word_cloud(text): | |
# Create a WordCloud object | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) | |
# Display the generated word cloud | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
# Plot the word cloud on the axis | |
ax.imshow(wordcloud, interpolation='bilinear') | |
ax.axis('off') | |
st.pyplot(fig) | |
def main(): | |
st.title("Meeting Summary Web App") | |
# YouTube link input | |
youtube_url = st.text_input("Enter the YouTube video link") | |
if st.button("Process Video"): | |
if youtube_url: | |
# Process the YouTube video | |
tot_text, tot_summary, tot_keywords = process_video(youtube_url) | |
# Display the output | |
if os.path.exists("output2.csv"): | |
output_df = pd.read_csv("output2.csv") | |
st.subheader("Transcriptions:") | |
st.write(output_df["transcriptions"]) | |
st.subheader("Labels:") | |
st.write(output_df["labels"]) | |
st.subheader("Word Cloud:") | |
generate_word_cloud(output_df["transcriptions"].str.cat(sep=' ')) | |
st.subheader("tot_text:") | |
st.write(tot_text) | |
st.subheader("tot_summary:") | |
st.write(tot_summary) | |
st.subheader("tot_keywords:") | |
st.write(tot_keywords) | |
else: | |
st.write("No output file found.") | |
if __name__ == "__main__": | |
main() |