|
import os |
|
import streamlit as st |
|
from embedchain import App |
|
from typing import Dict, Any, List |
|
|
|
|
|
def timestamp_to_seconds(timestamp): |
|
"""Convert a timestamp in the format 'hh:mm:ss' or 'mm:ss' to total seconds.""" |
|
parts = timestamp.split(':') |
|
if len(parts) == 3: |
|
h, m, s = map(int, parts) |
|
ts = h * 3600 + m * 60 + s |
|
elif len(parts) == 2: |
|
m, s = map(int, parts) |
|
ts = m * 60 + s |
|
else: |
|
raise ValueError(f"Invalid timestamp format: {timestamp}") |
|
|
|
return ts |
|
|
|
|
|
class AIAssistant: |
|
def __init__(self): |
|
self.app = self._create_app() |
|
|
|
def _get_api_key(self, name: str) -> str: |
|
api_key = os.environ.get(name) |
|
if not api_key: |
|
api_key = st.secrets.get(name) |
|
if not api_key: |
|
raise ValueError( |
|
f"{name} is not set. Please set it in your environment or Streamlit secrets.") |
|
return api_key |
|
|
|
def _create_config(self) -> Dict[str, Any]: |
|
return { |
|
'app': { |
|
'config': { |
|
'name': 'ttv-ec' |
|
} |
|
}, |
|
'llm': { |
|
'provider': 'huggingface', |
|
'config': { |
|
'model': 'mistralai/Mistral-7B-Instruct-v0.2', |
|
'top_p': 0.5, |
|
'stream': False, |
|
'prompt': """You are an AI assistant that answers questions based solely on the information provided in your knowledge base. |
|
|
|
Question: $query |
|
Context: $context |
|
|
|
If the information to answer a question is not available in your knowledge base, |
|
respond with 'I don't have enough information to answer that question. |
|
""", |
|
'api_key': self._get_api_key('HF_TOKEN') |
|
} |
|
}, |
|
'embedder': { |
|
'provider': 'huggingface', |
|
'config': { |
|
'model': 'sentence-transformers/all-mpnet-base-v2', |
|
'api_key': self._get_api_key('HF_TOKEN') |
|
} |
|
} |
|
} |
|
|
|
def _create_app(self) -> App: |
|
config = self._create_config() |
|
return App.from_config(config=config) |
|
|
|
def save(self) -> None: |
|
|
|
return |
|
|
|
def add_to_knowledge_base(self, data: str, data_type: str, metadata: Dict[str, Any] = None) -> None: |
|
self.app.add(data, data_type=data_type, metadata=metadata) |
|
|
|
def query(self, question: str, num_results: int = 30, filters: Dict[str, Any] = None) -> Dict[str, List[Dict[str, Any]]]: |
|
search_results = self.app.search( |
|
question, num_documents=num_results, where=filters) |
|
|
|
answer = "Here are the most relevant transcript excerpts:\n\n" |
|
for i, result in enumerate(search_results['results'], 1): |
|
metadata = result['metadata'] |
|
ts = timestamp_to_seconds(metadata['timestamp']) |
|
yt_url = f"https://youtu.be/{metadata['youtube_id']}?t={ts}" |
|
|
|
speaker_info = ( |
|
f"Speaker: {metadata.get('speaker', 'Unknown')}, " |
|
f"Company: {metadata.get('company', 'Unknown')}, " |
|
f"Timestamp: {metadata.get('timestamp', 'Unknown')}" |
|
) |
|
|
|
answer += f"{i}. [Speaker Info: {speaker_info}]({yt_url}) \n" |
|
answer += f"{metadata.get('title', 'Unknown')} \n" |
|
answer += f"\"{result['context']}\"\n\n" |
|
|
|
return {'results': search_results} |
|
|
|
|
|
|
|
|
|
def get_ai_assistant() -> AIAssistant: |
|
return AIAssistant() |
|
|