Spaces:
Running
Running
File size: 4,778 Bytes
507c938 dbd33b2 507c938 4096d75 507c938 61c90c9 507c938 dbd33b2 185fa42 dbd33b2 61c90c9 507c938 dbd33b2 507c938 dbd33b2 185fa42 507c938 dbd33b2 507c938 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
from dotenv import load_dotenv
import ollama
import logging
import time
import sys
load_dotenv()
# Configure logging for stdout only
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
stream=sys.stdout
)
logger = logging.getLogger(__name__)
# Define the RAG prompt template
RAG_PROMPT_TEMPLATE = """
You are an AI assistant analyzing YouTube video transcripts. Your task is to answer questions based on the provided transcript context.
Context from transcript:
{context}
User Question: {question}
Please provide a clear, concise answer based only on the information given in the context. If the context doesn't contain enough information to fully answer the question, acknowledge this in your response.
Guidelines:
1. Use only information from the provided context
2. Be specific and direct in your answer
3. If context is insufficient, say so
4. Maintain accuracy and avoid speculation
5. Use natural, conversational language
""".strip()
class RAGSystem:
def __init__(self, data_processor):
self.data_processor = data_processor
self.model = os.getenv('OLLAMA_MODEL', 'phi3.5')
self.ollama_host = os.getenv('OLLAMA_HOST', 'http://ollama:11434')
self.timeout = int(os.getenv('OLLAMA_TIMEOUT', 240))
self.max_retries = int(os.getenv('OLLAMA_MAX_RETRIES', 3))
self.check_ollama_service()
def check_ollama_service(self):
try:
ollama.list()
logger.info("Ollama service is accessible.")
self.pull_model()
except Exception as e:
logger.error(f"An error occurred while connecting to Ollama: {e}")
logger.error(f"Please ensure Ollama is running and accessible at {self.ollama_host}")
def pull_model(self):
try:
ollama.pull(self.model)
logger.info(f"Successfully pulled model {self.model}.")
except Exception as e:
logger.error(f"Error pulling model {self.model}: {e}")
def generate(self, prompt):
for attempt in range(self.max_retries):
try:
response = ollama.chat(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
print("Printing the response from OLLAMA: "+response['message']['content'])
return response['message']['content']
except Exception as e:
logger.error(f"Error generating response on attempt {attempt + 1}: {e}")
if attempt == self.max_retries - 1:
logger.error("All retries exhausted. Unable to generate response.")
return None
time.sleep(2 ** attempt) # Exponential backoff
def get_prompt(self, user_query, relevant_docs):
context = "\n".join([doc['content'] for doc in relevant_docs])
return RAG_PROMPT_TEMPLATE.format(
context=context,
question=user_query
)
def query(self, user_query, search_method='hybrid', index_name=None):
try:
if not index_name:
raise ValueError("No index name provided. Please select a video and ensure it has been processed.")
relevant_docs = self.data_processor.search(user_query, num_results=3, method=search_method, index_name=index_name)
if not relevant_docs:
logger.warning("No relevant documents found for the query.")
return "I couldn't find any relevant information to answer your query.", ""
prompt = self.get_prompt(user_query, relevant_docs)
response = ollama.chat(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
answer = response['message']['content']
return answer, prompt
except Exception as e:
logger.error(f"An error occurred in the RAG system: {e}")
return f"An error occurred: {str(e)}", ""
def rewrite_cot(self, query):
prompt = f"""Rewrite the following query using chain-of-thought reasoning:
Query: {query}
Rewritten query:"""
response = self.generate(prompt)
if response:
return response, prompt
return query, prompt # Return original query if rewriting fails
def rewrite_react(self, query):
prompt = f"""Rewrite the following query using ReAct (Reasoning and Acting) approach:
Query: {query}
Rewritten query:"""
response = self.generate(prompt)
if response:
return response, prompt
return query, prompt # Return original query if rewriting fails |