TalatMasood commited on
Commit
e9d730a
·
1 Parent(s): 2461d7a

Refactoring code

Browse files
Files changed (31) hide show
  1. config/__pycache__/config.cpython-312.pyc +0 -0
  2. config/config.py +3 -0
  3. src/__pycache__/main.cpython-312.pyc +0 -0
  4. src/agents/__pycache__/rag_agent.cpython-312.pyc +0 -0
  5. src/db/__pycache__/mongodb_store.cpython-312.pyc +0 -0
  6. src/db/mongodb_store.py +86 -0
  7. src/embeddings/__pycache__/__init__.cpython-312.pyc +0 -0
  8. src/implementations/__init__.py +4 -0
  9. src/implementations/__pycache__/__init__.cpython-312.pyc +0 -0
  10. src/implementations/__pycache__/document_service.cpython-312.pyc +0 -0
  11. src/implementations/document_service.py +147 -0
  12. src/llms/__pycache__/bert_llm.cpython-312.pyc +0 -0
  13. src/llms/__pycache__/falcon_llm.cpython-312.pyc +0 -0
  14. src/llms/__pycache__/llama_llm.cpython-312.pyc +0 -0
  15. src/llms/__pycache__/openai_llm.cpython-312.pyc +0 -0
  16. src/main.py +54 -261
  17. src/models/__init__.py +26 -0
  18. src/models/__pycache__/__init__.cpython-312.pyc +0 -0
  19. src/models/__pycache__/base.cpython-312.pyc +0 -0
  20. src/models/__pycache__/chat.cpython-312.pyc +0 -0
  21. src/models/__pycache__/document.cpython-312.pyc +0 -0
  22. src/models/base.py +9 -0
  23. src/models/chat.py +37 -0
  24. src/models/document.py +22 -0
  25. src/utils/__pycache__/conversation_summarizer.cpython-312.pyc +0 -0
  26. src/utils/__pycache__/document_processor.cpython-312.pyc +0 -0
  27. src/utils/__pycache__/llm_utils.cpython-312.pyc +0 -0
  28. src/utils/llm_utils.py +59 -0
  29. src/vectorstores/__pycache__/__init__.cpython-312.pyc +0 -0
  30. src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc +0 -0
  31. src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc +0 -0
config/__pycache__/config.cpython-312.pyc CHANGED
Binary files a/config/__pycache__/config.cpython-312.pyc and b/config/__pycache__/config.cpython-312.pyc differ
 
config/config.py CHANGED
@@ -23,6 +23,9 @@ class Settings:
23
  # Vector Store Configuration
24
  CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')
25
 
 
 
 
26
  # Application Configuration
27
  DEBUG = os.getenv('DEBUG', 'False') == 'True'
28
 
 
23
  # Vector Store Configuration
24
  CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')
25
 
26
+ # MongoDB Configuration
27
+ MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
28
+
29
  # Application Configuration
30
  DEBUG = os.getenv('DEBUG', 'False') == 'True'
31
 
src/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/main.cpython-312.pyc and b/src/__pycache__/main.cpython-312.pyc differ
 
src/agents/__pycache__/rag_agent.cpython-312.pyc CHANGED
Binary files a/src/agents/__pycache__/rag_agent.cpython-312.pyc and b/src/agents/__pycache__/rag_agent.cpython-312.pyc differ
 
src/db/__pycache__/mongodb_store.cpython-312.pyc ADDED
Binary file (3.86 kB). View file
 
src/db/mongodb_store.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/db/mongodb_store.py
2
+ from motor.motor_asyncio import AsyncIOMotorClient
3
+ from datetime import datetime
4
+ import json
5
+ from typing import List, Dict, Optional
6
+ from bson import ObjectId
7
+
8
+ class MongoDBStore:
9
+ def __init__(self, mongo_uri: str = "mongodb://localhost:27017"):
10
+ """Initialize MongoDB connection"""
11
+ self.client = AsyncIOMotorClient(mongo_uri)
12
+ self.db = self.client.rag_chatbot
13
+ self.chat_history = self.db.chat_history
14
+
15
+ async def store_message(
16
+ self,
17
+ conversation_id: str,
18
+ query: str,
19
+ response: str,
20
+ context: List[str],
21
+ sources: List[Dict],
22
+ llm_provider: str
23
+ ) -> str:
24
+ """Store chat message in MongoDB"""
25
+ document = {
26
+ "conversation_id": conversation_id,
27
+ "timestamp": datetime.now(),
28
+ "query": query,
29
+ "response": response,
30
+ "context": context,
31
+ "sources": sources,
32
+ "llm_provider": llm_provider,
33
+ "feedback": None,
34
+ "rating": None
35
+ }
36
+
37
+ result = await self.chat_history.insert_one(document)
38
+ return str(result.inserted_id)
39
+
40
+ async def get_conversation_history(self, conversation_id: str) -> List[Dict]:
41
+ """Retrieve conversation history"""
42
+ cursor = self.chat_history.find(
43
+ {"conversation_id": conversation_id}
44
+ ).sort("timestamp", 1)
45
+
46
+ history = []
47
+ async for document in cursor:
48
+ document["_id"] = str(document["_id"])
49
+ history.append(document)
50
+
51
+ return history
52
+
53
+ async def update_feedback(
54
+ self,
55
+ conversation_id: str,
56
+ feedback: Optional[str],
57
+ rating: Optional[int]
58
+ ) -> bool:
59
+ """Update feedback for a conversation"""
60
+ result = await self.chat_history.update_many(
61
+ {"conversation_id": conversation_id},
62
+ {
63
+ "$set": {
64
+ "feedback": feedback,
65
+ "rating": rating
66
+ }
67
+ }
68
+ )
69
+ return result.modified_count > 0
70
+
71
+ async def get_messages_for_summary(self, conversation_id: str) -> List[Dict]:
72
+ """Get messages in format suitable for summarization"""
73
+ cursor = self.chat_history.find(
74
+ {"conversation_id": conversation_id}
75
+ ).sort("timestamp", 1)
76
+
77
+ messages = []
78
+ async for doc in cursor:
79
+ messages.append({
80
+ 'role': 'user' if doc['query'] else 'assistant',
81
+ 'content': doc['query'] or doc['response'],
82
+ 'timestamp': doc['timestamp'],
83
+ 'sources': doc['sources']
84
+ })
85
+
86
+ return messages
src/embeddings/__pycache__/__init__.cpython-312.pyc CHANGED
Binary files a/src/embeddings/__pycache__/__init__.cpython-312.pyc and b/src/embeddings/__pycache__/__init__.cpython-312.pyc differ
 
src/implementations/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # src/implementations/__init__.py
2
+ from .document_service import DocumentService
3
+
4
+ __all__ = ['DocumentService']
src/implementations/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (308 Bytes). View file
 
src/implementations/__pycache__/document_service.cpython-312.pyc ADDED
Binary file (7.21 kB). View file
 
src/implementations/document_service.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/implementations/document_service.py
2
+ from pathlib import Path
3
+ import shutil
4
+ import os
5
+ import uuid
6
+ from typing import List, Tuple
7
+ from fastapi import UploadFile, BackgroundTasks
8
+ from ..vectorstores.chroma_vectorstore import ChromaVectorStore
9
+ from ..utils.document_processor import DocumentProcessor
10
+ from ..models import DocumentResponse, DocumentInfo, BatchUploadResponse
11
+ from ..utils.logger import logger
12
+
13
+ class DocumentService:
14
+ def __init__(self, doc_processor: DocumentProcessor):
15
+ self.doc_processor = doc_processor
16
+ self.upload_dir = Path("temp_uploads")
17
+ self.upload_dir.mkdir(exist_ok=True)
18
+
19
+ async def process_documents(
20
+ self,
21
+ files: List[UploadFile],
22
+ vector_store: ChromaVectorStore,
23
+ background_tasks: BackgroundTasks
24
+ ) -> BatchUploadResponse:
25
+ """Process multiple document uploads"""
26
+ processed_files, failed_files = await self._handle_file_uploads(
27
+ files,
28
+ vector_store,
29
+ background_tasks
30
+ )
31
+
32
+ return BatchUploadResponse(
33
+ message=f"Processed {len(processed_files)} documents with {len(failed_files)} failures",
34
+ processed_files=processed_files,
35
+ failed_files=failed_files
36
+ )
37
+
38
+ async def _handle_file_uploads(
39
+ self,
40
+ files: List[UploadFile],
41
+ vector_store: ChromaVectorStore,
42
+ background_tasks: BackgroundTasks
43
+ ) -> Tuple[List[DocumentResponse], List[dict]]:
44
+ """Handle individual file uploads and processing"""
45
+ processed_files = []
46
+ failed_files = []
47
+
48
+ for file in files:
49
+ try:
50
+ if not self._is_supported_format(file.filename):
51
+ failed_files.append(self._create_failed_file_entry(
52
+ file.filename,
53
+ "Unsupported file format"
54
+ ))
55
+ continue
56
+
57
+ document_response = await self._process_single_file(
58
+ file,
59
+ vector_store,
60
+ background_tasks
61
+ )
62
+ processed_files.append(document_response)
63
+
64
+ except Exception as e:
65
+ logger.error(f"Error processing file {file.filename}: {str(e)}")
66
+ failed_files.append(self._create_failed_file_entry(
67
+ file.filename,
68
+ str(e)
69
+ ))
70
+
71
+ return processed_files, failed_files
72
+
73
+ def _is_supported_format(self, filename: str) -> bool:
74
+ """Check if file format is supported"""
75
+ return any(filename.lower().endswith(ext)
76
+ for ext in self.doc_processor.supported_formats)
77
+
78
+ async def _process_single_file(
79
+ self,
80
+ file: UploadFile,
81
+ vector_store: ChromaVectorStore,
82
+ background_tasks: BackgroundTasks
83
+ ) -> DocumentResponse:
84
+ """Process a single file upload"""
85
+ document_id = str(uuid.uuid4())
86
+ temp_path = self.upload_dir / f"{document_id}_{file.filename}"
87
+
88
+ # Save file
89
+ with open(temp_path, "wb") as buffer:
90
+ shutil.copyfileobj(file.file, buffer)
91
+
92
+ # Add background task for processing
93
+ background_tasks.add_task(
94
+ self._process_and_store_document,
95
+ temp_path,
96
+ vector_store,
97
+ document_id
98
+ )
99
+
100
+ return DocumentResponse(
101
+ message="Document queued for processing",
102
+ document_id=document_id,
103
+ status="processing",
104
+ document_info=DocumentInfo(
105
+ original_filename=file.filename,
106
+ size=os.path.getsize(temp_path),
107
+ content_type=file.content_type
108
+ )
109
+ )
110
+
111
+ async def _process_and_store_document(
112
+ self,
113
+ file_path: Path,
114
+ vector_store: ChromaVectorStore,
115
+ document_id: str
116
+ ):
117
+ """Process document and store in vector database"""
118
+ try:
119
+ processed_doc = await self.doc_processor.process_document(file_path)
120
+
121
+ vector_store.add_documents(
122
+ documents=processed_doc['chunks'],
123
+ metadatas=[{
124
+ 'document_id': document_id,
125
+ 'chunk_id': i,
126
+ 'source': str(file_path.name),
127
+ 'metadata': processed_doc['metadata']
128
+ } for i in range(len(processed_doc['chunks']))],
129
+ ids=[f"{document_id}_chunk_{i}" for i in range(len(processed_doc['chunks']))]
130
+ )
131
+
132
+ return processed_doc
133
+ finally:
134
+ if file_path.exists():
135
+ file_path.unlink()
136
+
137
+ def _create_failed_file_entry(self, filename: str, error: str) -> dict:
138
+ """Create a failed file entry"""
139
+ return {
140
+ "filename": filename,
141
+ "error": error
142
+ }
143
+
144
+ def cleanup(self):
145
+ """Clean up upload directory"""
146
+ if self.upload_dir.exists() and not any(self.upload_dir.iterdir()):
147
+ self.upload_dir.rmdir()
src/llms/__pycache__/bert_llm.cpython-312.pyc CHANGED
Binary files a/src/llms/__pycache__/bert_llm.cpython-312.pyc and b/src/llms/__pycache__/bert_llm.cpython-312.pyc differ
 
src/llms/__pycache__/falcon_llm.cpython-312.pyc CHANGED
Binary files a/src/llms/__pycache__/falcon_llm.cpython-312.pyc and b/src/llms/__pycache__/falcon_llm.cpython-312.pyc differ
 
src/llms/__pycache__/llama_llm.cpython-312.pyc CHANGED
Binary files a/src/llms/__pycache__/llama_llm.cpython-312.pyc and b/src/llms/__pycache__/llama_llm.cpython-312.pyc differ
 
src/llms/__pycache__/openai_llm.cpython-312.pyc CHANGED
Binary files a/src/llms/__pycache__/openai_llm.cpython-312.pyc and b/src/llms/__pycache__/openai_llm.cpython-312.pyc differ
 
src/main.py CHANGED
@@ -1,29 +1,26 @@
1
  # src/main.py
2
- from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, BackgroundTasks
3
- from fastapi.responses import StreamingResponse, JSONResponse
4
- from pydantic import BaseModel
5
- from typing import List, Optional, AsyncGenerator, Dict
6
- import asyncio
7
- import json
8
  import uuid
9
  from datetime import datetime
10
- import aiosqlite
11
- from pathlib import Path
12
- import shutil
13
- import os
14
 
15
  # Import custom modules
16
- from .agents.rag_agent import RAGAgent
17
- from .llms.openai_llm import OpenAILanguageModel
18
- from .llms.ollama_llm import OllamaLanguageModel
19
- from .llms.bert_llm import BERTLanguageModel
20
- from .llms.falcon_llm import FalconLanguageModel
21
- from .llms.llama_llm import LlamaLanguageModel
22
- from .embeddings.huggingface_embedding import HuggingFaceEmbedding
23
- from .vectorstores.chroma_vectorstore import ChromaVectorStore
24
- from .utils.document_processor import DocumentProcessor
25
- from .utils.conversation_summarizer import ConversationSummarizer
26
- from .utils.logger import logger
 
 
 
 
27
  from config.config import settings
28
 
29
  app = FastAPI(title="RAG Chatbot API")
@@ -35,140 +32,11 @@ doc_processor = DocumentProcessor(
35
  max_file_size=10 * 1024 * 1024
36
  )
37
  summarizer = ConversationSummarizer()
 
38
 
39
- # Pydantic models
40
- class ChatRequest(BaseModel):
41
- query: str
42
- llm_provider: str = 'openai'
43
- max_context_docs: int = 3
44
- temperature: float = 0.7
45
- stream: bool = False
46
- conversation_id: Optional[str] = None
47
-
48
- class ChatResponse(BaseModel):
49
- response: str
50
- context: Optional[List[str]] = None
51
- sources: Optional[List[Dict[str, str]]] = None
52
- conversation_id: str
53
- timestamp: datetime
54
- relevant_doc_scores: Optional[List[float]] = None
55
-
56
- class DocumentResponse(BaseModel):
57
- message: str
58
- document_id: str
59
- status: str
60
- document_info: Optional[dict] = None
61
-
62
- class BatchUploadResponse(BaseModel):
63
- message: str
64
- processed_files: List[DocumentResponse]
65
- failed_files: List[dict]
66
-
67
- class SummarizeRequest(BaseModel):
68
- conversation_id: str
69
- include_metadata: bool = True
70
-
71
- class SummaryResponse(BaseModel):
72
- summary: str
73
- key_insights: Dict
74
- metadata: Optional[Dict] = None
75
-
76
- class FeedbackRequest(BaseModel):
77
- rating: int
78
- feedback: Optional[str] = None
79
-
80
- # Database initialization
81
- async def init_db():
82
- async with aiosqlite.connect('chat_history.db') as db:
83
- await db.execute('''
84
- CREATE TABLE IF NOT EXISTS chat_history (
85
- id INTEGER PRIMARY KEY AUTOINCREMENT,
86
- conversation_id TEXT,
87
- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
88
- query TEXT,
89
- response TEXT,
90
- context TEXT,
91
- sources TEXT,
92
- llm_provider TEXT,
93
- feedback TEXT,
94
- rating INTEGER
95
- )
96
- ''')
97
- await db.commit()
98
-
99
- # Utility functions
100
- def get_llm_instance(provider: str):
101
- """Get LLM instance based on provider"""
102
- llm_map = {
103
- 'openai': lambda: OpenAILanguageModel(api_key=settings.OPENAI_API_KEY),
104
- 'ollama': lambda: OllamaLanguageModel(base_url=settings.OLLAMA_BASE_URL),
105
- 'bert': lambda: BERTLanguageModel(),
106
- 'falcon': lambda: FalconLanguageModel(),
107
- 'llama': lambda: LlamaLanguageModel(),
108
- }
109
-
110
- if provider not in llm_map:
111
- raise ValueError(f"Unsupported LLM provider: {provider}")
112
- return llm_map[provider]()
113
-
114
- async def get_vector_store():
115
- """Initialize and return vector store with embedding model."""
116
- try:
117
- embedding = HuggingFaceEmbedding(model_name=settings.EMBEDDING_MODEL)
118
- vector_store = ChromaVectorStore(
119
- embedding_function=embedding.embed_documents,
120
- persist_directory=settings.CHROMA_PATH
121
- )
122
- return vector_store, embedding
123
- except Exception as e:
124
- logger.error(f"Error initializing vector store: {str(e)}")
125
- raise HTTPException(status_code=500, detail="Failed to initialize vector store")
126
-
127
- async def process_and_store_document(
128
- file_path: Path,
129
- vector_store: ChromaVectorStore,
130
- document_id: str
131
- ):
132
- """Process document and store in vector database."""
133
- try:
134
- processed_doc = await doc_processor.process_document(file_path)
135
-
136
- vector_store.add_documents(
137
- documents=processed_doc['chunks'],
138
- metadatas=[{
139
- 'document_id': document_id,
140
- 'chunk_id': i,
141
- 'source': str(file_path.name),
142
- 'metadata': processed_doc['metadata']
143
- } for i in range(len(processed_doc['chunks']))],
144
- ids=[f"{document_id}_chunk_{i}" for i in range(len(processed_doc['chunks']))]
145
- )
146
-
147
- return processed_doc
148
- finally:
149
- if file_path.exists():
150
- file_path.unlink()
151
-
152
- async def store_chat_history(
153
- conversation_id: str,
154
- query: str,
155
- response: str,
156
- context: List[str],
157
- sources: List[Dict],
158
- llm_provider: str
159
- ):
160
- """Store chat history in database"""
161
- async with aiosqlite.connect('chat_history.db') as db:
162
- await db.execute(
163
- '''INSERT INTO chat_history
164
- (conversation_id, query, response, context, sources, llm_provider)
165
- VALUES (?, ?, ?, ?, ?, ?)''',
166
- (conversation_id, query, response, json.dumps(context),
167
- json.dumps(sources), llm_provider)
168
- )
169
- await db.commit()
170
 
171
- # Endpoints
172
  @app.post("/documents/upload", response_model=BatchUploadResponse)
173
  async def upload_documents(
174
  files: List[UploadFile] = File(...),
@@ -177,68 +45,17 @@ async def upload_documents(
177
  """Upload and process multiple documents"""
178
  try:
179
  vector_store, _ = await get_vector_store()
180
- upload_dir = Path("temp_uploads")
181
- upload_dir.mkdir(exist_ok=True)
182
-
183
- processed_files = []
184
- failed_files = []
185
-
186
- for file in files:
187
- try:
188
- document_id = str(uuid.uuid4())
189
-
190
- if not any(file.filename.lower().endswith(ext)
191
- for ext in doc_processor.supported_formats):
192
- failed_files.append({
193
- "filename": file.filename,
194
- "error": "Unsupported file format"
195
- })
196
- continue
197
-
198
- temp_path = upload_dir / f"{document_id}_{file.filename}"
199
- with open(temp_path, "wb") as buffer:
200
- shutil.copyfileobj(file.file, buffer)
201
-
202
- background_tasks.add_task(
203
- process_and_store_document,
204
- temp_path,
205
- vector_store,
206
- document_id
207
- )
208
-
209
- processed_files.append(
210
- DocumentResponse(
211
- message="Document queued for processing",
212
- document_id=document_id,
213
- status="processing",
214
- document_info={
215
- "original_filename": file.filename,
216
- "size": os.path.getsize(temp_path),
217
- "content_type": file.content_type
218
- }
219
- )
220
- )
221
-
222
- except Exception as e:
223
- logger.error(f"Error processing file {file.filename}: {str(e)}")
224
- failed_files.append({
225
- "filename": file.filename,
226
- "error": str(e)
227
- })
228
-
229
- return BatchUploadResponse(
230
- message=f"Processed {len(processed_files)} documents with {len(failed_files)} failures",
231
- processed_files=processed_files,
232
- failed_files=failed_files
233
  )
234
-
235
  except Exception as e:
236
  logger.error(f"Error in document upload: {str(e)}")
237
  raise HTTPException(status_code=500, detail=str(e))
238
-
239
  finally:
240
- if upload_dir.exists() and not any(upload_dir.iterdir()):
241
- upload_dir.rmdir()
242
 
243
  @app.post("/chat", response_model=ChatResponse)
244
  async def chat_endpoint(
@@ -269,14 +86,14 @@ async def chat_endpoint(
269
 
270
  conversation_id = request.conversation_id or str(uuid.uuid4())
271
 
272
- background_tasks.add_task(
273
- store_chat_history,
274
- conversation_id,
275
- request.query,
276
- response.response,
277
- response.context_docs,
278
- response.sources,
279
- request.llm_provider
280
  )
281
 
282
  return ChatResponse(
@@ -295,44 +112,25 @@ async def chat_endpoint(
295
  @app.get("/chat/history/{conversation_id}")
296
  async def get_conversation_history(conversation_id: str):
297
  """Get complete conversation history"""
298
- async with aiosqlite.connect('chat_history.db') as db:
299
- db.row_factory = aiosqlite.Row
300
- async with db.execute(
301
- 'SELECT * FROM chat_history WHERE conversation_id = ? ORDER BY timestamp',
302
- (conversation_id,)
303
- ) as cursor:
304
- history = await cursor.fetchall()
305
-
306
- if not history:
307
- raise HTTPException(status_code=404, detail="Conversation not found")
308
-
309
- return {
310
- "conversation_id": conversation_id,
311
- "messages": [dict(row) for row in history]
312
- }
313
 
314
  @app.post("/chat/summarize", response_model=SummaryResponse)
315
  async def summarize_conversation(request: SummarizeRequest):
316
  """Generate a summary of a conversation"""
317
  try:
318
- async with aiosqlite.connect('chat_history.db') as db:
319
- db.row_factory = aiosqlite.Row
320
- async with db.execute(
321
- 'SELECT * FROM chat_history WHERE conversation_id = ? ORDER BY timestamp',
322
- (request.conversation_id,)
323
- ) as cursor:
324
- history = await cursor.fetchall()
325
 
326
- if not history:
327
  raise HTTPException(status_code=404, detail="Conversation not found")
328
 
329
- messages = [{
330
- 'role': 'user' if msg['query'] else 'assistant',
331
- 'content': msg['query'] or msg['response'],
332
- 'timestamp': msg['timestamp'],
333
- 'sources': json.loads(msg['sources']) if msg['sources'] else None
334
- } for msg in history]
335
-
336
  summary = await summarizer.summarize_conversation(
337
  messages,
338
  include_metadata=request.include_metadata
@@ -351,14 +149,14 @@ async def submit_feedback(
351
  ):
352
  """Submit feedback for a conversation"""
353
  try:
354
- async with aiosqlite.connect('chat_history.db') as db:
355
- await db.execute(
356
- '''UPDATE chat_history
357
- SET feedback = ?, rating = ?
358
- WHERE conversation_id = ?''',
359
- (feedback_request.feedback, feedback_request.rating, conversation_id)
360
- )
361
- await db.commit()
362
 
363
  return {"status": "Feedback submitted successfully"}
364
 
@@ -371,11 +169,6 @@ async def health_check():
371
  """Health check endpoint"""
372
  return {"status": "healthy"}
373
 
374
- # Startup event
375
- @app.on_event("startup")
376
- async def startup_event():
377
- await init_db()
378
-
379
  if __name__ == "__main__":
380
  import uvicorn
381
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
  # src/main.py
2
+ from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
3
+ from fastapi.responses import StreamingResponse
4
+ from typing import List
 
 
 
5
  import uuid
6
  from datetime import datetime
 
 
 
 
7
 
8
  # Import custom modules
9
+ from src.agents.rag_agent import RAGAgent
10
+ from src.utils.document_processor import DocumentProcessor
11
+ from src.utils.conversation_summarizer import ConversationSummarizer
12
+ from src.utils.logger import logger
13
+ from src.utils.llm_utils import get_llm_instance, get_vector_store
14
+ from src.db.mongodb_store import MongoDBStore
15
+ from src.implementations.document_service import DocumentService
16
+ from src.models import (
17
+ ChatRequest,
18
+ ChatResponse,
19
+ BatchUploadResponse,
20
+ SummarizeRequest,
21
+ SummaryResponse,
22
+ FeedbackRequest
23
+ )
24
  from config.config import settings
25
 
26
  app = FastAPI(title="RAG Chatbot API")
 
32
  max_file_size=10 * 1024 * 1024
33
  )
34
  summarizer = ConversationSummarizer()
35
+ document_service = DocumentService(doc_processor)
36
 
37
+ # Initialize MongoDB
38
+ mongodb = MongoDBStore(settings.MONGODB_URI)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
40
  @app.post("/documents/upload", response_model=BatchUploadResponse)
41
  async def upload_documents(
42
  files: List[UploadFile] = File(...),
 
45
  """Upload and process multiple documents"""
46
  try:
47
  vector_store, _ = await get_vector_store()
48
+ response = await document_service.process_documents(
49
+ files,
50
+ vector_store,
51
+ background_tasks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
53
+ return response
54
  except Exception as e:
55
  logger.error(f"Error in document upload: {str(e)}")
56
  raise HTTPException(status_code=500, detail=str(e))
 
57
  finally:
58
+ document_service.cleanup()
 
59
 
60
  @app.post("/chat", response_model=ChatResponse)
61
  async def chat_endpoint(
 
86
 
87
  conversation_id = request.conversation_id or str(uuid.uuid4())
88
 
89
+ # Store chat history in MongoDB
90
+ await mongodb.store_message(
91
+ conversation_id=conversation_id,
92
+ query=request.query,
93
+ response=response.response,
94
+ context=response.context_docs,
95
+ sources=response.sources,
96
+ llm_provider=request.llm_provider
97
  )
98
 
99
  return ChatResponse(
 
112
  @app.get("/chat/history/{conversation_id}")
113
  async def get_conversation_history(conversation_id: str):
114
  """Get complete conversation history"""
115
+ history = await mongodb.get_conversation_history(conversation_id)
116
+
117
+ if not history:
118
+ raise HTTPException(status_code=404, detail="Conversation not found")
119
+
120
+ return {
121
+ "conversation_id": conversation_id,
122
+ "messages": history
123
+ }
 
 
 
 
 
 
124
 
125
  @app.post("/chat/summarize", response_model=SummaryResponse)
126
  async def summarize_conversation(request: SummarizeRequest):
127
  """Generate a summary of a conversation"""
128
  try:
129
+ messages = await mongodb.get_messages_for_summary(request.conversation_id)
 
 
 
 
 
 
130
 
131
+ if not messages:
132
  raise HTTPException(status_code=404, detail="Conversation not found")
133
 
 
 
 
 
 
 
 
134
  summary = await summarizer.summarize_conversation(
135
  messages,
136
  include_metadata=request.include_metadata
 
149
  ):
150
  """Submit feedback for a conversation"""
151
  try:
152
+ success = await mongodb.update_feedback(
153
+ conversation_id=conversation_id,
154
+ feedback=feedback_request.feedback,
155
+ rating=feedback_request.rating
156
+ )
157
+
158
+ if not success:
159
+ raise HTTPException(status_code=404, detail="Conversation not found")
160
 
161
  return {"status": "Feedback submitted successfully"}
162
 
 
169
  """Health check endpoint"""
170
  return {"status": "healthy"}
171
 
 
 
 
 
 
172
  if __name__ == "__main__":
173
  import uvicorn
174
  uvicorn.run(app, host="0.0.0.0", port=8000)
src/models/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/models/__init__.py
2
+ from .chat import (
3
+ ChatRequest,
4
+ ChatResponse,
5
+ FeedbackRequest,
6
+ SummarizeRequest,
7
+ SummaryResponse
8
+ )
9
+ from .document import (
10
+ DocumentResponse,
11
+ BatchUploadResponse,
12
+ DocumentInfo
13
+ )
14
+ from .base import ChatMetadata
15
+
16
+ __all__ = [
17
+ 'ChatRequest',
18
+ 'ChatResponse',
19
+ 'FeedbackRequest',
20
+ 'SummarizeRequest',
21
+ 'SummaryResponse',
22
+ 'DocumentResponse',
23
+ 'BatchUploadResponse',
24
+ 'DocumentInfo',
25
+ 'ChatMetadata'
26
+ ]
src/models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (589 Bytes). View file
 
src/models/__pycache__/base.cpython-312.pyc ADDED
Binary file (689 Bytes). View file
 
src/models/__pycache__/chat.cpython-312.pyc ADDED
Binary file (2.36 kB). View file
 
src/models/__pycache__/document.cpython-312.pyc ADDED
Binary file (1.37 kB). View file
 
src/models/base.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # src/models/base.py
2
+ from pydantic import BaseModel
3
+ from datetime import datetime
4
+ from typing import Optional, List, Dict
5
+
6
+ class ChatMetadata(BaseModel):
7
+ """Base metadata model for chat-related responses"""
8
+ conversation_id: str
9
+ timestamp: datetime
src/models/chat.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/models/chat.py
2
+ from pydantic import BaseModel
3
+ from typing import Optional, List, Dict
4
+ from datetime import datetime
5
+ from .base import ChatMetadata
6
+
7
+ class ChatRequest(BaseModel):
8
+ """Request model for chat endpoint"""
9
+ query: str
10
+ llm_provider: str = 'openai'
11
+ max_context_docs: int = 3
12
+ temperature: float = 0.7
13
+ stream: bool = False
14
+ conversation_id: Optional[str] = None
15
+
16
+ class ChatResponse(ChatMetadata):
17
+ """Response model for chat endpoint"""
18
+ response: str
19
+ context: Optional[List[str]] = None
20
+ sources: Optional[List[Dict[str, str]]] = None
21
+ relevant_doc_scores: Optional[List[float]] = None
22
+
23
+ class FeedbackRequest(BaseModel):
24
+ """Request model for feedback endpoint"""
25
+ rating: int
26
+ feedback: Optional[str] = None
27
+
28
+ class SummarizeRequest(BaseModel):
29
+ """Request model for summarize endpoint"""
30
+ conversation_id: str
31
+ include_metadata: bool = True
32
+
33
+ class SummaryResponse(BaseModel):
34
+ """Response model for summarize endpoint"""
35
+ summary: str
36
+ key_insights: Dict
37
+ metadata: Optional[Dict] = None
src/models/document.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/models/document.py
2
+ from pydantic import BaseModel
3
+ from typing import Optional, List
4
+
5
+ class DocumentInfo(BaseModel):
6
+ """Document information model"""
7
+ original_filename: str
8
+ size: int
9
+ content_type: str
10
+
11
+ class DocumentResponse(BaseModel):
12
+ """Response model for document processing"""
13
+ message: str
14
+ document_id: str
15
+ status: str
16
+ document_info: Optional[DocumentInfo] = None
17
+
18
+ class BatchUploadResponse(BaseModel):
19
+ """Response model for batch document upload"""
20
+ message: str
21
+ processed_files: List[DocumentResponse]
22
+ failed_files: List[dict]
src/utils/__pycache__/conversation_summarizer.cpython-312.pyc CHANGED
Binary files a/src/utils/__pycache__/conversation_summarizer.cpython-312.pyc and b/src/utils/__pycache__/conversation_summarizer.cpython-312.pyc differ
 
src/utils/__pycache__/document_processor.cpython-312.pyc CHANGED
Binary files a/src/utils/__pycache__/document_processor.cpython-312.pyc and b/src/utils/__pycache__/document_processor.cpython-312.pyc differ
 
src/utils/__pycache__/llm_utils.cpython-312.pyc ADDED
Binary file (3.44 kB). View file
 
src/utils/llm_utils.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/utils/llm_utils.py
2
+ from fastapi import HTTPException
3
+ from typing import Tuple
4
+
5
+ from src.llms.openai_llm import OpenAILanguageModel
6
+ from src.llms.ollama_llm import OllamaLanguageModel
7
+ from src.llms.bert_llm import BERTLanguageModel
8
+ from src.llms.falcon_llm import FalconLanguageModel
9
+ from src.llms.llama_llm import LlamaLanguageModel
10
+ from src.embeddings.huggingface_embedding import HuggingFaceEmbedding
11
+ from src.vectorstores.chroma_vectorstore import ChromaVectorStore
12
+ from src.utils.logger import logger
13
+ from config.config import settings
14
+
15
+ def get_llm_instance(provider: str):
16
+ """
17
+ Get LLM instance based on provider
18
+
19
+ Args:
20
+ provider (str): Name of the LLM provider
21
+
22
+ Returns:
23
+ BaseLLM: Instance of the LLM
24
+
25
+ Raises:
26
+ ValueError: If provider is not supported
27
+ """
28
+ llm_map = {
29
+ 'openai': lambda: OpenAILanguageModel(api_key=settings.OPENAI_API_KEY),
30
+ 'ollama': lambda: OllamaLanguageModel(base_url=settings.OLLAMA_BASE_URL),
31
+ 'bert': lambda: BERTLanguageModel(),
32
+ 'falcon': lambda: FalconLanguageModel(),
33
+ 'llama': lambda: LlamaLanguageModel(),
34
+ }
35
+
36
+ if provider not in llm_map:
37
+ raise ValueError(f"Unsupported LLM provider: {provider}")
38
+ return llm_map[provider]()
39
+
40
+ async def get_vector_store() -> Tuple[ChromaVectorStore, HuggingFaceEmbedding]:
41
+ """
42
+ Initialize and return vector store with embedding model.
43
+
44
+ Returns:
45
+ Tuple[ChromaVectorStore, HuggingFaceEmbedding]: Initialized vector store and embedding model
46
+
47
+ Raises:
48
+ HTTPException: If vector store initialization fails
49
+ """
50
+ try:
51
+ embedding = HuggingFaceEmbedding(model_name=settings.EMBEDDING_MODEL)
52
+ vector_store = ChromaVectorStore(
53
+ embedding_function=embedding.embed_documents,
54
+ persist_directory=settings.CHROMA_PATH
55
+ )
56
+ return vector_store, embedding
57
+ except Exception as e:
58
+ logger.error(f"Error initializing vector store: {str(e)}")
59
+ raise HTTPException(status_code=500, detail="Failed to initialize vector store")
src/vectorstores/__pycache__/__init__.cpython-312.pyc CHANGED
Binary files a/src/vectorstores/__pycache__/__init__.cpython-312.pyc and b/src/vectorstores/__pycache__/__init__.cpython-312.pyc differ
 
src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc CHANGED
Binary files a/src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc and b/src/vectorstores/__pycache__/base_vectorstore.cpython-312.pyc differ
 
src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc CHANGED
Binary files a/src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc and b/src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc differ