File size: 3,564 Bytes
0540b53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
import streamlit as st
from embedchain import App
from typing import Dict, Any, List
def timestamp_to_seconds(timestamp):
"""Convert a timestamp in the format 'hh:mm:ss' or 'mm:ss' to total seconds."""
parts = timestamp.split(':')
if len(parts) == 3:
h, m, s = map(int, parts)
ts = h * 3600 + m * 60 + s
elif len(parts) == 2:
m, s = map(int, parts)
ts = m * 60 + s
else:
raise ValueError(f"Invalid timestamp format: {timestamp}")
return ts
class AIAssistant:
def __init__(self):
self.app = self._create_app()
def _get_api_key(self, name: str) -> str:
api_key = os.environ.get(name)
if not api_key:
api_key = st.secrets.get(name)
if not api_key:
raise ValueError(
f"{name} is not set. Please set it in your environment or Streamlit secrets.")
return api_key
def _create_config(self) -> Dict[str, Any]:
return {
'app': {
'config': {
'name': 'ttv-ec'
}
},
'llm': {
'provider': 'huggingface',
'config': {
'model': 'mistralai/Mistral-7B-Instruct-v0.2',
'top_p': 0.5,
'stream': False,
'prompt': """You are an AI assistant that answers questions based solely on the information provided in your knowledge base.
Question: $query
Context: $context
If the information to answer a question is not available in your knowledge base,
respond with 'I don't have enough information to answer that question.
""",
'api_key': self._get_api_key('HF_TOKEN')
}
},
'embedder': {
'provider': 'huggingface',
'config': {
'model': 'sentence-transformers/all-mpnet-base-v2',
'api_key': self._get_api_key('HF_TOKEN')
}
}
}
def _create_app(self) -> App:
config = self._create_config()
return App.from_config(config=config)
def save(self) -> None:
# null function
return
def add_to_knowledge_base(self, data: str, data_type: str, metadata: Dict[str, Any] = None) -> None:
self.app.add(data, data_type=data_type, metadata=metadata)
def query(self, question: str, num_results: int = 30, filters: Dict[str, Any] = None) -> Dict[str, List[Dict[str, Any]]]:
search_results = self.app.search(
question, num_documents=num_results, where=filters)
# Process and display search results
answer = "Here are the most relevant transcript excerpts:\n\n"
for i, result in enumerate(search_results['results'], 1):
metadata = result['metadata']
ts = timestamp_to_seconds(metadata['timestamp'])
yt_url = f"https://youtu.be/{metadata['youtube_id']}?t={ts}"
speaker_info = (
f"Speaker: {metadata.get('speaker', 'Unknown')}, "
f"Company: {metadata.get('company', 'Unknown')}, "
f"Timestamp: {metadata.get('timestamp', 'Unknown')}"
)
answer += f"{i}. [Speaker Info: {speaker_info}]({yt_url}) \n"
answer += f"{metadata.get('title', 'Unknown')} \n"
answer += f"\"{result['context']}\"\n\n"
return {'results': search_results}
# Usage example
def get_ai_assistant() -> AIAssistant:
return AIAssistant()
|