Spaces:

eusholli
/

ttv-ec

Build error

App Files Files Community

ttv-ec / ai_config_ec.py

eusholli

faiss filter initial commit

0540b53 7 months ago

raw

history blame

3.56 kB

	import os
	import streamlit as st
	from embedchain import App
	from typing import Dict, Any, List


	def timestamp_to_seconds(timestamp):
	"""Convert a timestamp in the format 'hh:mm:ss' or 'mm:ss' to total seconds."""
	parts = timestamp.split(':')
	if len(parts) == 3:
	h, m, s = map(int, parts)
	ts = h * 3600 + m * 60 + s
	elif len(parts) == 2:
	m, s = map(int, parts)
	ts = m * 60 + s
	else:
	raise ValueError(f"Invalid timestamp format: {timestamp}")

	return ts


	class AIAssistant:
	def __init__(self):
	self.app = self._create_app()

	def _get_api_key(self, name: str) -> str:
	api_key = os.environ.get(name)
	if not api_key:
	api_key = st.secrets.get(name)
	if not api_key:
	raise ValueError(
	f"{name} is not set. Please set it in your environment or Streamlit secrets.")
	return api_key

	def _create_config(self) -> Dict[str, Any]:
	return {
	'app': {
	'config': {
	'name': 'ttv-ec'
	}
	},
	'llm': {
	'provider': 'huggingface',
	'config': {
	'model': 'mistralai/Mistral-7B-Instruct-v0.2',
	'top_p': 0.5,
	'stream': False,
	'prompt': """You are an AI assistant that answers questions based solely on the information provided in your knowledge base.

	Question: $query
	Context: $context

	If the information to answer a question is not available in your knowledge base,
	respond with 'I don't have enough information to answer that question.
	""",
	'api_key': self._get_api_key('HF_TOKEN')
	}
	},
	'embedder': {
	'provider': 'huggingface',
	'config': {
	'model': 'sentence-transformers/all-mpnet-base-v2',
	'api_key': self._get_api_key('HF_TOKEN')
	}
	}
	}

	def _create_app(self) -> App:
	config = self._create_config()
	return App.from_config(config=config)

	def save(self) -> None:
	# null function
	return

	def add_to_knowledge_base(self, data: str, data_type: str, metadata: Dict[str, Any] = None) -> None:
	self.app.add(data, data_type=data_type, metadata=metadata)

	def query(self, question: str, num_results: int = 30, filters: Dict[str, Any] = None) -> Dict[str, List[Dict[str, Any]]]:
	search_results = self.app.search(
	question, num_documents=num_results, where=filters)
	# Process and display search results
	answer = "Here are the most relevant transcript excerpts:\n\n"
	for i, result in enumerate(search_results['results'], 1):
	metadata = result['metadata']
	ts = timestamp_to_seconds(metadata['timestamp'])
	yt_url = f"https://youtu.be/{metadata['youtube_id']}?t={ts}"

	speaker_info = (
	f"Speaker: {metadata.get('speaker', 'Unknown')}, "
	f"Company: {metadata.get('company', 'Unknown')}, "
	f"Timestamp: {metadata.get('timestamp', 'Unknown')}"
	)

	answer += f"{i}. [Speaker Info: {speaker_info}]({yt_url}) \n"
	answer += f"{metadata.get('title', 'Unknown')} \n"
	answer += f"\"{result['context']}\"\n\n"

	return {'results': search_results}

	# Usage example


	def get_ai_assistant() -> AIAssistant:
	return AIAssistant()