TalatMasood commited on
Commit
0739c8b
·
1 Parent(s): 4daad35

Commit chatbot chnages

Browse files
src/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/main.cpython-312.pyc and b/src/__pycache__/main.cpython-312.pyc differ
 
src/agents/__pycache__/rag_agent.cpython-312.pyc CHANGED
Binary files a/src/agents/__pycache__/rag_agent.cpython-312.pyc and b/src/agents/__pycache__/rag_agent.cpython-312.pyc differ
 
src/agents/rag_agent.py CHANGED
@@ -1,16 +1,11 @@
1
  # src/agents/rag_agent.py
2
- from dataclasses import dataclass
3
- from typing import List, Optional
4
 
5
  from ..llms.base_llm import BaseLLM
6
  from src.embeddings.base_embedding import BaseEmbedding
7
  from src.vectorstores.base_vectorstore import BaseVectorStore
8
  from src.utils.text_splitter import split_text
9
-
10
- @dataclass
11
- class RAGResponse:
12
- response: str
13
- context_docs: Optional[List[str]] = None
14
 
15
  class RAGAgent:
16
  def __init__(
@@ -23,11 +18,21 @@ class RAGAgent:
23
  self.embedding = embedding
24
  self.vector_store = vector_store
25
 
 
 
 
 
 
 
 
 
 
 
26
  def retrieve_context(
27
  self,
28
  query: str,
29
  top_k: int = 3
30
- ) -> List[str]:
31
  """
32
  Retrieve relevant context documents for a given query
33
 
@@ -36,22 +41,36 @@ class RAGAgent:
36
  top_k (int): Number of top context documents to retrieve
37
 
38
  Returns:
39
- List[str]: List of retrieved context documents
40
  """
41
  # Embed the query
42
  query_embedding = self.embedding.embed_query(query)
43
 
44
- # Retrieve similar documents
45
- context_docs = self.vector_store.similarity_search(
46
  query_embedding,
47
  top_k=top_k
48
  )
49
 
50
- return context_docs
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- def generate_response(
53
  self,
54
- query: str,
 
 
55
  context_docs: Optional[List[str]] = None
56
  ) -> RAGResponse:
57
  """
@@ -59,6 +78,8 @@ class RAGAgent:
59
 
60
  Args:
61
  query (str): User input query
 
 
62
  context_docs (Optional[List[str]]): Optional pre-provided context documents
63
 
64
  Returns:
@@ -66,17 +87,26 @@ class RAGAgent:
66
  """
67
  # If no context provided, retrieve from vector store
68
  if not context_docs:
69
- context_docs = self.retrieve_context(query)
 
 
 
70
 
71
  # Construct augmented prompt with context
72
  augmented_prompt = self._construct_prompt(query, context_docs)
73
 
74
- # Generate response using LLM
75
- response = self.llm.generate(augmented_prompt)
 
 
 
 
76
 
77
  return RAGResponse(
78
- response=response,
79
- context_docs=context_docs
 
 
80
  )
81
 
82
  def _construct_prompt(
 
1
  # src/agents/rag_agent.py
2
+ from typing import List, Optional, Tuple, Dict
 
3
 
4
  from ..llms.base_llm import BaseLLM
5
  from src.embeddings.base_embedding import BaseEmbedding
6
  from src.vectorstores.base_vectorstore import BaseVectorStore
7
  from src.utils.text_splitter import split_text
8
+ from src.models.rag import RAGResponse
 
 
 
 
9
 
10
  class RAGAgent:
11
  def __init__(
 
18
  self.embedding = embedding
19
  self.vector_store = vector_store
20
 
21
+ def _convert_metadata_to_strings(self, metadata: Dict) -> Dict:
22
+ """Convert numeric metadata values to strings"""
23
+ converted = {}
24
+ for key, value in metadata.items():
25
+ if isinstance(value, (int, float)):
26
+ converted[key] = str(value)
27
+ else:
28
+ converted[key] = value
29
+ return converted
30
+
31
  def retrieve_context(
32
  self,
33
  query: str,
34
  top_k: int = 3
35
+ ) -> Tuple[List[str], List[Dict], Optional[List[float]]]:
36
  """
37
  Retrieve relevant context documents for a given query
38
 
 
41
  top_k (int): Number of top context documents to retrieve
42
 
43
  Returns:
44
+ Tuple[List[str], List[Dict], Optional[List[float]]]: Retrieved documents, sources, and scores
45
  """
46
  # Embed the query
47
  query_embedding = self.embedding.embed_query(query)
48
 
49
+ # Retrieve similar documents with metadata and scores
50
+ results = self.vector_store.similarity_search(
51
  query_embedding,
52
  top_k=top_k
53
  )
54
 
55
+ # Extract documents, sources, and scores from results
56
+ documents = [doc['text'] for doc in results]
57
+
58
+ # Convert numeric metadata values to strings
59
+ sources = [self._convert_metadata_to_strings(doc['metadata']) for doc in results]
60
+
61
+ scores = [doc['score'] for doc in results if doc.get('score') is not None]
62
+
63
+ # Only return scores if we have them for all documents
64
+ if len(scores) != len(documents):
65
+ scores = None
66
+
67
+ return documents, sources, scores
68
 
69
+ async def generate_response(
70
  self,
71
+ query: str,
72
+ temperature: float = 0.7,
73
+ max_tokens: Optional[int] = None,
74
  context_docs: Optional[List[str]] = None
75
  ) -> RAGResponse:
76
  """
 
78
 
79
  Args:
80
  query (str): User input query
81
+ temperature (float): Sampling temperature for the LLM
82
+ max_tokens (Optional[int]): Maximum tokens to generate
83
  context_docs (Optional[List[str]]): Optional pre-provided context documents
84
 
85
  Returns:
 
87
  """
88
  # If no context provided, retrieve from vector store
89
  if not context_docs:
90
+ context_docs, sources, scores = self.retrieve_context(query)
91
+ else:
92
+ sources = None
93
+ scores = None
94
 
95
  # Construct augmented prompt with context
96
  augmented_prompt = self._construct_prompt(query, context_docs)
97
 
98
+ # Generate response using LLM with temperature
99
+ response = self.llm.generate(
100
+ augmented_prompt,
101
+ temperature=temperature,
102
+ max_tokens=max_tokens
103
+ )
104
 
105
  return RAGResponse(
106
+ response=response,
107
+ context_docs=context_docs,
108
+ sources=sources,
109
+ scores=scores
110
  )
111
 
112
  def _construct_prompt(
src/db/__pycache__/mongodb_store.cpython-312.pyc CHANGED
Binary files a/src/db/__pycache__/mongodb_store.cpython-312.pyc and b/src/db/__pycache__/mongodb_store.cpython-312.pyc differ
 
src/implementations/__pycache__/document_service.cpython-312.pyc CHANGED
Binary files a/src/implementations/__pycache__/document_service.cpython-312.pyc and b/src/implementations/__pycache__/document_service.cpython-312.pyc differ
 
src/main.py CHANGED
@@ -289,6 +289,37 @@ async def submit_feedback(
289
  logger.error(f"Error submitting feedback: {str(e)}")
290
  raise HTTPException(status_code=500, detail=str(e))
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  @app.get("/health")
293
  async def health_check():
294
  """Health check endpoint"""
 
289
  logger.error(f"Error submitting feedback: {str(e)}")
290
  raise HTTPException(status_code=500, detail=str(e))
291
 
292
+ @app.get("/debug/config")
293
+ async def debug_config():
294
+ """Debug endpoint to check configuration"""
295
+ import os
296
+ from config.config import settings
297
+ from pathlib import Path
298
+
299
+ debug_info = {
300
+ "environment_variables": {
301
+ "OPENAI_API_KEY": "[SET]" if os.getenv('OPENAI_API_KEY') else "[NOT SET]",
302
+ "OPENAI_MODEL": os.getenv('OPENAI_MODEL', '[NOT SET]')
303
+ },
304
+ "settings": {
305
+ "OPENAI_API_KEY": "[SET]" if settings.OPENAI_API_KEY else "[NOT SET]",
306
+ "OPENAI_MODEL": settings.OPENAI_MODEL,
307
+ },
308
+ "files": {
309
+ "env_file_exists": Path('.env').exists(),
310
+ "openai_config_exists": (Path.home() / '.openai' / 'api_key').exists()
311
+ }
312
+ }
313
+
314
+ if settings.OPENAI_API_KEY:
315
+ key = settings.OPENAI_API_KEY
316
+ debug_info["api_key_info"] = {
317
+ "length": len(key),
318
+ "preview": f"{key[:4]}...{key[-4:]}" if len(key) > 8 else "[INVALID LENGTH]"
319
+ }
320
+
321
+ return debug_info
322
+
323
  @app.get("/health")
324
  async def health_check():
325
  """Health check endpoint"""
src/models/__pycache__/document.cpython-312.pyc CHANGED
Binary files a/src/models/__pycache__/document.cpython-312.pyc and b/src/models/__pycache__/document.cpython-312.pyc differ
 
src/models/__pycache__/rag.cpython-312.pyc ADDED
Binary file (833 Bytes). View file
 
src/models/rag.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/models/rag.py
2
+ from dataclasses import dataclass
3
+ from typing import List, Optional, Dict
4
+
5
+ @dataclass
6
+ class RAGResponse:
7
+ """Response model for RAG-based generation"""
8
+ response: str
9
+ context_docs: Optional[List[str]] = None
10
+ sources: Optional[List[Dict]] = None
11
+ scores: Optional[List[float]] = None
src/utils/__pycache__/logger.cpython-312.pyc CHANGED
Binary files a/src/utils/__pycache__/logger.cpython-312.pyc and b/src/utils/__pycache__/logger.cpython-312.pyc differ
 
src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc CHANGED
Binary files a/src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc and b/src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc differ
 
src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc CHANGED
Binary files a/src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc and b/src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc differ
 
src/vectorstores/chroma_vectorstore.py CHANGED
@@ -91,7 +91,7 @@ class ChromaVectorStore(BaseVectorStore):
91
  query_embedding: List[float],
92
  top_k: int = 3,
93
  **kwargs
94
- ) -> List[str]:
95
  """
96
  Perform similarity search
97
 
@@ -101,20 +101,34 @@ class ChromaVectorStore(BaseVectorStore):
101
  **kwargs: Additional search parameters
102
 
103
  Returns:
104
- List[str]: List of most similar documents
105
  """
106
  try:
107
  results = self.collection.query(
108
  query_embeddings=[query_embedding],
109
  n_results=top_k,
110
- **kwargs
111
  )
112
 
113
  # Handle the case where no results are found
114
- if not results or 'documents' not in results:
115
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- return results.get('documents', [[]])[0]
118
  except Exception as e:
119
  logging.error(f"Error performing similarity search in ChromaDB: {str(e)}")
120
  raise
@@ -125,6 +139,12 @@ class ChromaVectorStore(BaseVectorStore):
125
  ) -> List[Dict[str, Any]]:
126
  """
127
  Retrieve all documents from the vector store
 
 
 
 
 
 
128
  """
129
  try:
130
  include = ["documents", "metadatas"]
@@ -163,7 +183,15 @@ class ChromaVectorStore(BaseVectorStore):
163
  raise
164
 
165
  def get_document_chunks(self, document_id: str) -> List[Dict[str, Any]]:
166
- """Retrieve all chunks for a specific document"""
 
 
 
 
 
 
 
 
167
  try:
168
  results = self.collection.get(
169
  where={"document_id": document_id},
@@ -190,7 +218,12 @@ class ChromaVectorStore(BaseVectorStore):
190
  raise
191
 
192
  def delete_document(self, document_id: str) -> None:
193
- """Delete all chunks associated with a document_id"""
 
 
 
 
 
194
  try:
195
  # Get all chunks with the given document_id
196
  results = self.collection.get(
 
91
  query_embedding: List[float],
92
  top_k: int = 3,
93
  **kwargs
94
+ ) -> List[Dict[str, Any]]:
95
  """
96
  Perform similarity search
97
 
 
101
  **kwargs: Additional search parameters
102
 
103
  Returns:
104
+ List[Dict[str, Any]]: List of documents with their text, metadata, and scores
105
  """
106
  try:
107
  results = self.collection.query(
108
  query_embeddings=[query_embedding],
109
  n_results=top_k,
110
+ include=['documents', 'metadatas', 'distances']
111
  )
112
 
113
  # Handle the case where no results are found
114
+ if not results or 'documents' not in results or not results['documents']:
115
  return []
116
+
117
+ # Format results to include text, metadata, and scores
118
+ formatted_results = []
119
+ documents = results['documents'][0] # First query's results
120
+ metadatas = results['metadatas'][0] if results.get('metadatas') else [None] * len(documents)
121
+ distances = results['distances'][0] if results.get('distances') else [None] * len(documents)
122
+
123
+ for doc, meta, dist in zip(documents, metadatas, distances):
124
+ formatted_results.append({
125
+ 'text': doc,
126
+ 'metadata': meta or {},
127
+ 'score': 1.0 - (dist or 0.0) if dist is not None else None # Convert distance to similarity score
128
+ })
129
+
130
+ return formatted_results
131
 
 
132
  except Exception as e:
133
  logging.error(f"Error performing similarity search in ChromaDB: {str(e)}")
134
  raise
 
139
  ) -> List[Dict[str, Any]]:
140
  """
141
  Retrieve all documents from the vector store
142
+
143
+ Args:
144
+ include_embeddings (bool): Whether to include embeddings in the response
145
+
146
+ Returns:
147
+ List[Dict[str, Any]]: List of documents with their IDs and optionally embeddings
148
  """
149
  try:
150
  include = ["documents", "metadatas"]
 
183
  raise
184
 
185
  def get_document_chunks(self, document_id: str) -> List[Dict[str, Any]]:
186
+ """
187
+ Retrieve all chunks for a specific document
188
+
189
+ Args:
190
+ document_id (str): ID of the document to retrieve chunks for
191
+
192
+ Returns:
193
+ List[Dict[str, Any]]: List of document chunks with their metadata
194
+ """
195
  try:
196
  results = self.collection.get(
197
  where={"document_id": document_id},
 
218
  raise
219
 
220
  def delete_document(self, document_id: str) -> None:
221
+ """
222
+ Delete all chunks associated with a document_id
223
+
224
+ Args:
225
+ document_id (str): ID of the document to delete
226
+ """
227
  try:
228
  # Get all chunks with the given document_id
229
  results = self.collection.get(