import gradio as gr import pixeltable as pxt from pixeltable.iterators import FrameIterator, StringSplitter from pixeltable.functions.video import extract_audio from pixeltable.functions.audio import get_metadata from pixeltable.functions import openai import os import getpass import numpy as np from pixeltable.functions.huggingface import sentence_transformer # Store OpenAI API Key if 'OPENAI_API_KEY' not in os.environ: os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:') MAX_VIDEO_SIZE_MB = 35 def process_video(video_file, progress=gr.Progress()): progress(0, desc="Initializing...") try: # Create a Table, a View, and Computed Columns pxt.drop_dir('gong_demo', force=True) pxt.create_dir('gong_demo') calls_table = pxt.create_table( 'gong_demo.calls', { "video": pxt.VideoType(nullable=True), } ) # Create computed columns to store transformations and persist outputs calls_table['audio'] = extract_audio(calls_table.video, format='mp3') calls_table['metadata'] = get_metadata(calls_table.audio) calls_table['transcription'] = openai.transcriptions(audio=calls_table.audio, model='whisper-1') calls_table['transcription_text'] = calls_table.transcription.text.astype(pxt.StringType()) sentences_view = pxt.create_view( 'gong_demo.sentences', calls_table, iterator=StringSplitter.create( text=calls_table.transcription_text, separators='sentence' ) ) @pxt.expr_udf def e5_embed(text: str) -> np.ndarray: return sentence_transformer(text, model_id='intfloat/e5-large-v2') sentences_view.add_embedding_index('text', string_embed=e5_embed) progress(0.2, desc="Creating UDFs...") # Custom User-Defined Function (UDF) for Generating Insights @pxt.udf def generate_insights(transcription: str) -> list[dict]: system_msg = 'You are an AI assistant that analyzes call transcriptions. Analyze the following call transcription and provide insights on: 1. Main topics discussed 2. Action items 3. Sentiment analysis 4. Key questions asked' user_msg = f'Transcription: "{transcription}"' return [ {'role': 'system', 'content': system_msg}, {'role': 'user', 'content': user_msg} ] # Apply the UDF to create a new column calls_table['insights_prompt'] = generate_insights(calls_table.transcription_text) progress(0.4, desc="Generating insights...") # Generate insights using OpenAI's chat completion API calls_table['insights_response'] = openai.chat_completions(messages=calls_table.insights_prompt, model='gpt-3.5-turbo', max_tokens=500) # Extract the content of the response calls_table['insights'] = calls_table.insights_response.choices[0].message.content if not video_file: return "Please upload a video file.", "" # Check video file size video_size = os.path.getsize(video_file) / (1024 * 1024) # Convert to MB if video_size > MAX_VIDEO_SIZE_MB: return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", "" progress(0.6, desc="Processing video...") # Insert a video into the table calls_table.insert([{"video": video_file}]) progress(0.8, desc="Retrieving results...") # Retrieve transcription and insights result = calls_table.select(calls_table.transcription_text, calls_table.insights).tail(1) transcription = result['transcription_text'][0] insights = result['insights'][0] progress(1.0, desc="Processing complete") return transcription, insights, "Processing complete" except Exception as e: return f"An error occurred during video processing: {str(e)}", "" # Perform similarity search def similarity_search(query, num_results, progress=gr.Progress()): sentences_view = pxt.get_table('gong_demo.sentences') progress(0.5, desc="Performing search...") sim = sentences_view.text.similarity(query) results = sentences_view.order_by(sim, asc=False).limit(num_results).select(sentences_view.text, sim=sim).collect().to_pandas() return results progress(1.0, desc="Search complete") def chatbot_response(message, chat_history): @pxt.udf def create_chatbot_prompt(context: str, question: str) -> list[dict]: system_message = "You are an AI assistant that answers questions about a call based on the provided context. If the answer cannot be found in the context, say that you don't know." user_message = f"Context:\n{context}\n\nQuestion: {question}" return [ {"role": "system", "content": system_message}, {"role": "user", "content": user_message} ] try: sentences_view = pxt.get_table('gong_demo.sentences') # Perform similarity search to get relevant context sim = sentences_view.text.similarity(message) context = sentences_view.order_by(sim, asc=False).limit(5).select(sentences_view.text, sim=sim).collect() # Prepare the context for the prompt context_text = "\n".join([row['text'] for row in context]) # Create a temporary table for the chatbot interaction temp_table = pxt.create_table('gong_demo.temp_chatbot', {'question': pxt.StringType()}) temp_table.insert([{'question': message}]) # Create computed columns for the prompt and response temp_table['chatbot_prompt'] = create_chatbot_prompt(context_text, temp_table.question) temp_table['chatbot_response'] = openai.chat_completions( messages=temp_table.chatbot_prompt, model='gpt-3.5-turbo', max_tokens=150 ) temp_table['answer'] = temp_table.chatbot_response.choices[0].message.content answer = temp_table.select(temp_table.answer).collect()['answer'][0] # Clean up the temporary table pxt.drop_table('gong_demo.temp_chatbot', force=True) chat_history.append((message, answer)) return "", chat_history # Return both expected outputs except Exception as e: error_message = f"An error occurred: {str(e)}" chat_history.append((message, error_message)) return "", chat_history # Return both expec # Gradio interface with gr.Blocks(theme=gr.themes.Base()) as demo: gr.Markdown( """
Pixeltable

Call Analysis AI Tool

""" ) gr.HTML( """

Pixeltable is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.

""" ) with gr.Row(): with gr.Column(): with gr.Accordion("🎯 What does it do?", open=False): gr.Markdown(""" - 🎙️ Transcribes call audio to text - 💡 Generates insights and key points - 🔍 Enables content-based similarity search - 🤖 Provides an AI chatbot for in-depth analysis - 📊 Offers summaries of call data """) with gr.Column(): with gr.Accordion("🛠️ How does it work?", open=False): gr.Markdown(""" 1. 📤 Upload your call recording (video) 2. ⚙️ AI processes and analyzes the content 3. 📝 Review the transcript and generated insights 4. 🔎 Use similarity search to explore specific topics 5. 💬 Interact with the AI chatbot for deeper understanding """) with gr.Row(): with gr.Column(scale=1): video_file = gr.Video( label=f"Upload Call Recording (max {MAX_VIDEO_SIZE_MB} MB)", ) process_btn = gr.Button("Analyze Call", variant="primary") status_output = gr.Textbox(label="Status", interactive=False) with gr.Column(scale=2): with gr.Tabs() as tabs: with gr.TabItem("📝 Transcript"): output_transcription = gr.Textbox(label="Call Transcription", lines=15) with gr.TabItem("💡 Insights"): output_insights = gr.Textbox(label="Key Takeaways", lines=10) with gr.TabItem("🔍 Similarity Search"): with gr.Row(): similarity_query = gr.Textbox(label="Search Query", placeholder="Enter a topic or phrase to search for") num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results") similarity_search_btn = gr.Button("Search", variant="secondary") similarity_results = gr.DataFrame( headers=["Relevant Text", "Similarity Score"], label="Search Results" ) with gr.TabItem("🤖 AI Assistant"): chatbot = gr.Chatbot(height=400, label="Chat with AI about the call") with gr.Row(): msg = gr.Textbox(label="Ask a question about the call", placeholder="e.g., What were the main points discussed?", scale=4) send_btn = gr.Button("Send", variant="secondary", scale=1) clear = gr.Button("Clear Chat") process_btn.click( process_video, inputs=[video_file], outputs=[output_transcription, output_insights, status_output], show_progress="full" ) similarity_search_btn.click( similarity_search, inputs=[similarity_query, num_results], outputs=[similarity_results] ) msg.submit(chatbot_response, [msg, chatbot], [msg, chatbot]) send_btn.click(chatbot_response, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()