Spaces:

eusholli
/

ttv-ec

Build error

File size: 3,564 Bytes

0540b53

import os
import streamlit as st
from embedchain import App
from typing import Dict, Any, List


def timestamp_to_seconds(timestamp):
    """Convert a timestamp in the format 'hh:mm:ss' or 'mm:ss' to total seconds."""
    parts = timestamp.split(':')
    if len(parts) == 3:
        h, m, s = map(int, parts)
        ts = h * 3600 + m * 60 + s
    elif len(parts) == 2:
        m, s = map(int, parts)
        ts = m * 60 + s
    else:
        raise ValueError(f"Invalid timestamp format: {timestamp}")

    return ts


class AIAssistant:
    def __init__(self):
        self.app = self._create_app()

    def _get_api_key(self, name: str) -> str:
        api_key = os.environ.get(name)
        if not api_key:
            api_key = st.secrets.get(name)
        if not api_key:
            raise ValueError(
                f"{name} is not set. Please set it in your environment or Streamlit secrets.")
        return api_key

    def _create_config(self) -> Dict[str, Any]:
        return {
            'app': {
                'config': {
                    'name': 'ttv-ec'
                }
            },
            'llm': {
                'provider': 'huggingface',
                'config': {
                    'model': 'mistralai/Mistral-7B-Instruct-v0.2',
                    'top_p': 0.5,
                    'stream': False,
                    'prompt': """You are an AI assistant that answers questions based solely on the information provided in your knowledge base.

Question: $query
Context: $context

If the information to answer a question is not available in your knowledge base,
respond with 'I don't have enough information to answer that question.
""",
                    'api_key': self._get_api_key('HF_TOKEN')
                }
            },
            'embedder': {
                'provider': 'huggingface',
                'config': {
                    'model': 'sentence-transformers/all-mpnet-base-v2',
                    'api_key': self._get_api_key('HF_TOKEN')
                }
            }
        }

    def _create_app(self) -> App:
        config = self._create_config()
        return App.from_config(config=config)

    def save(self) -> None:
        # null function
        return

    def add_to_knowledge_base(self, data: str, data_type: str, metadata: Dict[str, Any] = None) -> None:
        self.app.add(data, data_type=data_type, metadata=metadata)

    def query(self, question: str, num_results: int = 30, filters: Dict[str, Any] = None) -> Dict[str, List[Dict[str, Any]]]:
        search_results = self.app.search(
            question, num_documents=num_results, where=filters)
        # Process and display search results
        answer = "Here are the most relevant transcript excerpts:\n\n"
        for i, result in enumerate(search_results['results'], 1):
            metadata = result['metadata']
            ts = timestamp_to_seconds(metadata['timestamp'])
            yt_url = f"https://youtu.be/{metadata['youtube_id']}?t={ts}"

            speaker_info = (
                f"Speaker: {metadata.get('speaker', 'Unknown')}, "
                f"Company: {metadata.get('company', 'Unknown')}, "
                f"Timestamp: {metadata.get('timestamp', 'Unknown')}"
            )

            answer += f"{i}. [Speaker Info: {speaker_info}]({yt_url})  \n"
            answer += f"{metadata.get('title', 'Unknown')}  \n"
            answer += f"\"{result['context']}\"\n\n"

        return {'results': search_results}

# Usage example


def get_ai_assistant() -> AIAssistant:
    return AIAssistant()