dsmultimedika commited on
Commit
0767396
·
1 Parent(s): d879d77

fix : update code

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitignore CHANGED
@@ -398,4 +398,14 @@ FodyWeavers.xsd
398
  *.sln.iml
399
 
400
  .env
401
- *.pem
 
 
 
 
 
 
 
 
 
 
 
398
  *.sln.iml
399
 
400
  .env
401
+
402
+ *.pem
403
+ *.ipynb
404
+ *.json
405
+
406
+ # Ignore directories and specific folders
407
+ /research/
408
+
409
+ # Auto Generated PWA files
410
+ **/public/sw.js
411
+ **/public/workbox-*.js
api/events.py CHANGED
@@ -7,8 +7,6 @@ from db.database import engine
7
  from db.models import Base
8
  from llama_index.core import set_global_handler
9
 
10
-
11
-
12
  load_dotenv()
13
 
14
 
@@ -23,6 +21,7 @@ async def startup() -> None:
23
  async def shutdown() -> None:
24
  pass
25
 
 
26
  def register_events(app: FastAPI) -> FastAPI:
27
  app.add_event_handler("startup", startup)
28
  app.add_event_handler("shutdown", shutdown)
 
7
  from db.models import Base
8
  from llama_index.core import set_global_handler
9
 
 
 
10
  load_dotenv()
11
 
12
 
 
21
  async def shutdown() -> None:
22
  pass
23
 
24
+
25
  def register_events(app: FastAPI) -> FastAPI:
26
  app.add_event_handler("startup", startup)
27
  app.add_event_handler("shutdown", shutdown)
api/function.py CHANGED
@@ -13,7 +13,7 @@ from script.document_uploader import Uploader
13
  from script.vector_db import IndexManager
14
  from service.aws_loader import Loader
15
  from service.dto import BotResponseStreaming
16
-
17
 
18
  load_dotenv()
19
 
@@ -21,7 +21,7 @@ load_dotenv()
21
  logging.basicConfig(level=logging.INFO)
22
 
23
 
24
- async def data_ingestion(reference, file: UploadFile) -> Any:
25
  try:
26
  # Assuming you have a Langfuse callback handler
27
  langfuse_callback_handler = LlamaIndexCallbackHandler()
@@ -29,33 +29,31 @@ async def data_ingestion(reference, file: UploadFile) -> Any:
29
  user_id="admin_book_uploaded",
30
  )
31
 
32
- uploader = Uploader(reference, file)
33
  nodes_with_metadata, file_stream = await uploader.process_documents()
 
 
 
34
 
35
  # Build indexes using IndexManager
36
  index = IndexManager()
37
  index.build_indexes(nodes_with_metadata)
38
-
39
 
40
- # # Upload to AWS
41
  file_name = f"{reference['title']}"
42
  aws_loader = Loader()
43
 
44
- # file_obj = file
45
  aws_loader.upload_to_s3(file_stream, file_name)
46
 
47
-
48
  return json.dumps(
49
  {"status": "success", "message": "Vector Index loaded successfully."}
50
  )
51
 
52
  except Exception as e:
53
- # Log the error and raise HTTPException for FastAPI
54
  logging.error("An error occurred in data ingestion: %s", e)
55
- return JSONResponse(
56
- status_code=500,
57
- content="An internal server error occurred in data ingestion.",
58
- )
59
 
60
  async def generate_streaming_completion(user_request, session_id):
61
  try:
@@ -64,7 +62,6 @@ async def generate_streaming_completion(user_request, session_id):
64
 
65
  # Load existing indexes
66
  index = index_manager.load_existing_indexes()
67
-
68
  # Retrieve the chat engine with the loaded index
69
  chat_engine = engine.get_chat_engine(index, session_id)
70
  # Generate completion response
 
13
  from script.vector_db import IndexManager
14
  from service.aws_loader import Loader
15
  from service.dto import BotResponseStreaming
16
+ from utils.error_handlers import handle_exception
17
 
18
  load_dotenv()
19
 
 
21
  logging.basicConfig(level=logging.INFO)
22
 
23
 
24
+ async def data_ingestion(reference, file: UploadFile, lang: str = "en") -> Any:
25
  try:
26
  # Assuming you have a Langfuse callback handler
27
  langfuse_callback_handler = LlamaIndexCallbackHandler()
 
29
  user_id="admin_book_uploaded",
30
  )
31
 
32
+ uploader = Uploader(reference, file, lang)
33
  nodes_with_metadata, file_stream = await uploader.process_documents()
34
+
35
+ if isinstance(nodes_with_metadata, JSONResponse):
36
+ return nodes_with_metadata # Return the error response directly
37
 
38
  # Build indexes using IndexManager
39
  index = IndexManager()
40
  index.build_indexes(nodes_with_metadata)
 
41
 
42
+ # Upload AWS
43
  file_name = f"{reference['title']}"
44
  aws_loader = Loader()
45
 
 
46
  aws_loader.upload_to_s3(file_stream, file_name)
47
 
 
48
  return json.dumps(
49
  {"status": "success", "message": "Vector Index loaded successfully."}
50
  )
51
 
52
  except Exception as e:
53
+ # Log the error
54
  logging.error("An error occurred in data ingestion: %s", e)
55
+ # Use handle_exception for structured error handling
56
+ return handle_exception(e)
 
 
57
 
58
  async def generate_streaming_completion(user_request, session_id):
59
  try:
 
62
 
63
  # Load existing indexes
64
  index = index_manager.load_existing_indexes()
 
65
  # Retrieve the chat engine with the loaded index
66
  chat_engine = engine.get_chat_engine(index, session_id)
67
  # Generate completion response
api/router/book.py CHANGED
@@ -18,6 +18,7 @@ from config import MYSQL_CONFIG
18
  from utils.error_handlers import handle_exception
19
  from script.vector_db import IndexManager
20
  from service.dto import MetadataResponse
 
21
  from sqlalchemy.orm import Session
22
  from sqlalchemy.future import select
23
 
@@ -37,9 +38,7 @@ async def get_metadata(user: user_dependency, db: db_dependency):
37
  try:
38
  # Join Metadata with Category to get the category name
39
  fetching = DataFetching(user, db)
40
- # print(fetching)
41
  metadata_fetching = fetching.metadata_fetching()
42
- # print(metadata_fetching)
43
 
44
  # Transform results into MetadataResponse model with optional thumbnail handling
45
  return metadata_fetching
@@ -58,13 +57,15 @@ async def upload_file(
58
  year: int = Form(...),
59
  publisher: str = Form(...),
60
  file: UploadFile = File(...),
 
61
  thumbnail: Optional[UploadFile] = File(None),
62
  ):
63
  auth_response = check_admin_authentication(user)
64
  if auth_response:
65
  return auth_response
66
-
67
 
 
 
68
 
69
  # Query the category based on category_id
70
  category_query = CategoryQuery(user)
@@ -80,28 +81,32 @@ async def upload_file(
80
  }
81
 
82
  # Process the file and handle data ingestion
83
- response = await data_ingestion(reference, file)
 
 
 
84
 
 
 
 
 
 
 
 
85
  # Create a new Metadata object
86
  book_query = BookQuery(user)
87
  book_query.add_book(db, title, author, category_id, year, publisher)
88
  logging.info("Database Inserted")
89
 
90
  return {
91
- "status": "success",
92
  "filename": file.filename,
93
  "response": response,
94
  "info": "upload file successfully",
95
  }
96
 
97
  except Exception as e:
98
- return {
99
- "status": "error",
100
- "filename": "",
101
- "response": "",
102
- "info": "upload file failed",
103
- "error_message":handle_exception(e)
104
- }
105
 
106
  @router.put("/book/{metadata_id}")
107
  async def update_metadata(
@@ -153,6 +158,7 @@ async def update_metadata(
153
 
154
  # Update existing metadata entry
155
  metadata = db.query(Metadata).filter(Metadata.id == metadata_id).first()
 
156
 
157
  if not metadata:
158
  return JSONResponse(status_code=404, content="Metadata not found")
@@ -160,10 +166,11 @@ async def update_metadata(
160
  updated_metadata = book_query.update_metadata_entry(
161
  db, metadata_id, title, author, category_id, year, publisher
162
  )
 
163
  updated_category = category_query.get_category(db, updated_metadata.category_id)
 
164
 
165
  return MetadataResponse(
166
- status="success",
167
  id=metadata_id,
168
  title=updated_metadata.title,
169
  author=updated_metadata.author,
@@ -179,10 +186,7 @@ async def update_metadata(
179
  )
180
 
181
  except Exception as e:
182
- return {
183
- "status":"error",
184
- "error_message":handle_exception(e)
185
- }
186
 
187
  @router.delete("/book/{metadata_id}")
188
  async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id: int):
@@ -203,12 +207,8 @@ async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id:
203
  db.delete(metadata)
204
  db.commit()
205
 
206
- return {"status": "delete successfully"}
207
 
208
  except Exception as e:
209
- return {
210
- "status": "error",
211
- "message": "delete failed",
212
- "error_message": handle_exception(e)
213
- }
214
 
 
18
  from utils.error_handlers import handle_exception
19
  from script.vector_db import IndexManager
20
  from service.dto import MetadataResponse
21
+ from service.aws_loader import Loader
22
  from sqlalchemy.orm import Session
23
  from sqlalchemy.future import select
24
 
 
38
  try:
39
  # Join Metadata with Category to get the category name
40
  fetching = DataFetching(user, db)
 
41
  metadata_fetching = fetching.metadata_fetching()
 
42
 
43
  # Transform results into MetadataResponse model with optional thumbnail handling
44
  return metadata_fetching
 
57
  year: int = Form(...),
58
  publisher: str = Form(...),
59
  file: UploadFile = File(...),
60
+ lang: str = Form(None),
61
  thumbnail: Optional[UploadFile] = File(None),
62
  ):
63
  auth_response = check_admin_authentication(user)
64
  if auth_response:
65
  return auth_response
 
66
 
67
+ # Restrict `lang` to only "id" or "en"
68
+ lang = lang if lang in {"id", "en"} else "en"
69
 
70
  # Query the category based on category_id
71
  category_query = CategoryQuery(user)
 
81
  }
82
 
83
  # Process the file and handle data ingestion
84
+ response = await data_ingestion(reference, file, lang)
85
+
86
+ if isinstance(response, JSONResponse):
87
+ return response # Return the error response directly
88
 
89
+ if thumbnail:
90
+ file_name = f"{reference['title']}"
91
+ aws_loader = Loader()
92
+ ekstensi_file = file.filename.split(".")[-1].lower()
93
+ aws_loader.upload_image_to_s3(file=thumbnail, custom_name=f"{file_name}.{ekstensi_file}")
94
+
95
+
96
  # Create a new Metadata object
97
  book_query = BookQuery(user)
98
  book_query.add_book(db, title, author, category_id, year, publisher)
99
  logging.info("Database Inserted")
100
 
101
  return {
 
102
  "filename": file.filename,
103
  "response": response,
104
  "info": "upload file successfully",
105
  }
106
 
107
  except Exception as e:
108
+ return handle_exception(e)
109
+
 
 
 
 
 
110
 
111
  @router.put("/book/{metadata_id}")
112
  async def update_metadata(
 
158
 
159
  # Update existing metadata entry
160
  metadata = db.query(Metadata).filter(Metadata.id == metadata_id).first()
161
+ print(metadata)
162
 
163
  if not metadata:
164
  return JSONResponse(status_code=404, content="Metadata not found")
 
166
  updated_metadata = book_query.update_metadata_entry(
167
  db, metadata_id, title, author, category_id, year, publisher
168
  )
169
+ print(updated_metadata)
170
  updated_category = category_query.get_category(db, updated_metadata.category_id)
171
+ print(updated_category)
172
 
173
  return MetadataResponse(
 
174
  id=metadata_id,
175
  title=updated_metadata.title,
176
  author=updated_metadata.author,
 
186
  )
187
 
188
  except Exception as e:
189
+ return handle_exception(e)
 
 
 
190
 
191
  @router.delete("/book/{metadata_id}")
192
  async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id: int):
 
207
  db.delete(metadata)
208
  db.commit()
209
 
210
+ return {"Status": "delete successfully"}
211
 
212
  except Exception as e:
213
+ return handle_exception(e)
 
 
 
 
214
 
api/router/book_collection.py CHANGED
@@ -30,11 +30,7 @@ async def get_book_collection(user: user_dependency, db: db_dependency):
30
  "book_collection": book_collection,
31
  }
32
  except Exception as e:
33
- return {
34
- "status": "error",
35
- "book_collection": [],
36
- "error_message": handle_exception(e)
37
- }
38
 
39
 
40
  @router.post("/book_collection")
@@ -52,12 +48,7 @@ async def request_book_collection(
52
  return user_meta_query.insert_user_meta_entries(db, metadata_ids)
53
 
54
  except Exception as e:
55
- return {
56
- "status": "error",
57
- "message": "User meta entries failed to added.",
58
- "metadata_ids": [], # Include the metadata IDs in the result
59
- "error_message":handle_exception(e)
60
- }
61
 
62
 
63
  @router.put("/book_collection")
@@ -74,13 +65,7 @@ async def update_book_collection(
74
  return user_meta_query.update_user_meta_entries(db, metadata_ids)
75
 
76
  except Exception as e:
77
- return {
78
- "status": "error",
79
- "added_meta": [],
80
- "deleted_meta": [],
81
- "kept_meta": [],
82
- "message":handle_exception(e)
83
- }
84
 
85
 
86
  @router.delete("/book_collection/{metadata_id}")
@@ -98,11 +83,7 @@ async def delete_book_collection(
98
  db, metadata_id=metadata_id
99
  )
100
  except Exception as e:
101
- return {
102
- "status": "error",
103
- "message": f"Book user with id {metadata_id} deleted successfully.",
104
- "error_message": handle_exception(e)
105
- }
106
 
107
 
108
  @router.delete("/all_collections")
@@ -121,8 +102,4 @@ async def delete_all_book(user: user_dependency, db: db_dependency):
121
  "message": f"Deleted book collection for user {user.get('id')}",
122
  }
123
  except Exception as e:
124
- return {
125
- "status": "error",
126
- "message": f"Delete failed for user {user.get('id')}",
127
- "error_message": handle_exception(e)
128
- }
 
30
  "book_collection": book_collection,
31
  }
32
  except Exception as e:
33
+ return handle_exception(e)
 
 
 
 
34
 
35
 
36
  @router.post("/book_collection")
 
48
  return user_meta_query.insert_user_meta_entries(db, metadata_ids)
49
 
50
  except Exception as e:
51
+ return handle_exception(e)
 
 
 
 
 
52
 
53
 
54
  @router.put("/book_collection")
 
65
  return user_meta_query.update_user_meta_entries(db, metadata_ids)
66
 
67
  except Exception as e:
68
+ return handle_exception(e)
 
 
 
 
 
 
69
 
70
 
71
  @router.delete("/book_collection/{metadata_id}")
 
83
  db, metadata_id=metadata_id
84
  )
85
  except Exception as e:
86
+ return handle_exception(e)
 
 
 
 
87
 
88
 
89
  @router.delete("/all_collections")
 
102
  "message": f"Deleted book collection for user {user.get('id')}",
103
  }
104
  except Exception as e:
105
+ return handle_exception(e)
 
 
 
 
api/router/bot.py DELETED
@@ -1,92 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, Depends
2
- from service.dto import UserPromptRequest, BotResponse
3
- from core.chat.chatstore import ChatStore
4
-
5
- from api.function import (
6
- generate_streaming_completion,
7
- generate_completion_non_streaming,
8
- )
9
- from sse_starlette.sse import EventSourceResponse
10
- from utils.utils import generate_uuid
11
-
12
- router = APIRouter(tags=["Bot"])
13
-
14
- def get_chat_store():
15
- return ChatStore()
16
-
17
- @router.post("/bot/new")
18
- async def create_new_session():
19
- session_id = generate_uuid()
20
- return {"session_id" : session_id}
21
-
22
- @router.get("/bot/{session_id}")
23
- async def get_session_id(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
24
- chat_history = chat_store.get_messages(session_id)
25
-
26
- if not chat_history:
27
- raise HTTPException(status_code=404, detail="Session not found or empty.")
28
-
29
- return chat_history
30
-
31
- @router.get("/bot")
32
- async def get_all_session_ids():
33
- try:
34
- chat_store = ChatStore()
35
- all_keys = chat_store.get_keys()
36
- print(all_keys)
37
- return all_keys
38
- except Exception as e:
39
- # Log the error and raise HTTPException for FastAPI
40
- print(f"An error occurred in update data.: {e}")
41
- raise HTTPException(
42
- status_code=400, detail="the error when get all session ids"
43
- )
44
-
45
-
46
- @router.post("/bot/{session_id}")
47
- async def bot_generator_general(user_prompt_request: UserPromptRequest):
48
-
49
- if user_prompt_request.streaming:
50
- return EventSourceResponse(
51
- generate_streaming_completion(
52
- user_prompt_request.prompt, user_prompt_request.streaming
53
- )
54
- )
55
- else:
56
- response, raw_references, references, metadata, scores = (
57
- generate_completion_non_streaming(
58
- user_prompt_request.session_id, user_prompt_request.prompt, user_prompt_request.streaming
59
- )
60
- )
61
-
62
- return BotResponse(
63
- content=response,
64
- raw_references=raw_references,
65
- references=references,
66
- metadata=metadata,
67
- scores=scores,
68
- )
69
-
70
-
71
- @router.post("/bot/{category_id}/{title}") #Ganti router
72
- async def bot_generator_spesific(
73
- category_id: int, title: str, user_prompt_request: UserPromptRequest
74
- ):
75
- pass
76
-
77
- @router.delete("/bot/{session_id}")
78
- async def delete_bot(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
79
- try:
80
- chat_store.delete_messages(session_id)
81
- return {"info": f"Delete {session_id} successful"}
82
- except Exception as e:
83
- # Log the error and raise HTTPException for FastAPI
84
- print(f"An error occurred in update data.: {e}")
85
- raise HTTPException(
86
- status_code=400, detail="the error when deleting message"
87
- )
88
-
89
-
90
- @router.get("/bot/{category_id}/{title}")
91
- async def get_favourite_data(category_id: int, title: str, human_template):
92
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/router/bot_general.py CHANGED
@@ -27,7 +27,10 @@ def get_chat_store():
27
 
28
 
29
  @router.post("/bot_general/new")
30
- async def create_session_general():
 
 
 
31
  session_id = generate_uuid()
32
  return {"session_id": session_id}
33
 
@@ -54,9 +57,12 @@ async def get_session_id(
54
 
55
  @router.post("/bot/{session_id}")
56
  async def bot_generator_general(
57
- session_id: str, user_prompt_request: UserPromptRequest
58
  ):
59
-
 
 
 
60
  langfuse_callback_handler = LlamaIndexCallbackHandler()
61
  langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
62
 
@@ -77,8 +83,12 @@ async def bot_generator_general(
77
 
78
  @router.delete("/bot/{session_id}")
79
  async def delete_bot(
80
- db: db_dependency, session_id: str, chat_store: ChatStore = Depends(get_chat_store)
81
  ):
 
 
 
 
82
  try:
83
  chat_store.delete_messages(session_id)
84
  # Delete session from database
 
27
 
28
 
29
  @router.post("/bot_general/new")
30
+ async def create_session_general(user: user_dependency):
31
+ auth_response = check_user_authentication(user)
32
+ if auth_response:
33
+ return auth_response
34
  session_id = generate_uuid()
35
  return {"session_id": session_id}
36
 
 
57
 
58
  @router.post("/bot/{session_id}")
59
  async def bot_generator_general(
60
+ user: user_dependency,session_id: str, user_prompt_request: UserPromptRequest
61
  ):
62
+ auth_response = check_user_authentication(user)
63
+ if auth_response:
64
+ return auth_response
65
+
66
  langfuse_callback_handler = LlamaIndexCallbackHandler()
67
  langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
68
 
 
83
 
84
  @router.delete("/bot/{session_id}")
85
  async def delete_bot(
86
+ user: user_dependency,db: db_dependency, session_id: str, chat_store: ChatStore = Depends(get_chat_store)
87
  ):
88
+ auth_response = check_user_authentication(user)
89
+ if auth_response:
90
+ return auth_response
91
+
92
  try:
93
  chat_store.delete_messages(session_id)
94
  # Delete session from database
api/router/bot_one.py CHANGED
@@ -6,7 +6,8 @@ import pytz
6
  from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
7
  from datetime import datetime
8
  from core.chat.chatstore import ChatStore
9
- from core.chat.bot_service import ChatCompletionService
 
10
  from db.database import get_db
11
  from db.models import Session_Publisher
12
  from db.query.query_book import BookQuery
@@ -139,7 +140,7 @@ async def get_all_session_bot_one(
139
  "id": session.id,
140
  "bot_name": session.bot_name,
141
  "updated_at": str(session.updated_at),
142
- "last_message": chat_store.get_last_message_mongodb(session.id),
143
  }
144
  for session in sessions
145
  ]
 
6
  from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
7
  from datetime import datetime
8
  from core.chat.chatstore import ChatStore
9
+ # from core.chat.bot_service import ChatCompletionService
10
+ from core.chat.bot_service_multimodal import ChatCompletionService
11
  from db.database import get_db
12
  from db.models import Session_Publisher
13
  from db.query.query_book import BookQuery
 
140
  "id": session.id,
141
  "bot_name": session.bot_name,
142
  "updated_at": str(session.updated_at),
143
+ "last_message": chat_store.get_last_message_mongodb(session.id)
144
  }
145
  for session in sessions
146
  ]
api/router/bot_specific.py CHANGED
@@ -10,7 +10,8 @@ from sqlalchemy.exc import SQLAlchemyError, NoResultFound
10
 
11
  from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
12
  from core.chat.chatstore import ChatStore
13
- from core.chat.bot_service import ChatCompletionService
 
14
  from db.database import get_db
15
  from db.models import Bot_Meta, Bot, Metadata
16
  from db.models import Session as SessionModel
 
10
 
11
  from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
12
  from core.chat.chatstore import ChatStore
13
+ # from core.chat.bot_service import ChatCompletionService
14
+ from core.chat.bot_service_multimodal import ChatCompletionService
15
  from db.database import get_db
16
  from db.models import Bot_Meta, Bot, Metadata
17
  from db.models import Session as SessionModel
api/router/category.py CHANGED
@@ -85,8 +85,9 @@ async def create_category(user: user_dependency, db: db_dependency, category: Ca
85
  # Check if category already exists
86
  category_query = CategoryQuery(user)
87
  existing_category = category_query.get_existing_category(db, category.category_name)
88
- if not isinstance(existing_category,JSONResponse):
89
- return JSONResponse(status_code=400, content="Category already exists")
 
90
 
91
  # Add category
92
  category_query.add_category(db, category.category_name)
 
85
  # Check if category already exists
86
  category_query = CategoryQuery(user)
87
  existing_category = category_query.get_existing_category(db, category.category_name)
88
+ print(existing_category)
89
+ if existing_category: # Check if the category already exists
90
+ return JSONResponse(status_code=400, content={"error": "Category already exists"})
91
 
92
  # Add category
93
  category_query.add_category(db, category.category_name)
api/router/testing.py CHANGED
@@ -1,5 +1,4 @@
1
- from fastapi import FastAPI, HTTPException, Depends, Form
2
- from fastapi.security import OAuth2PasswordBearer
3
  import httpx
4
  import os
5
  from dotenv import load_dotenv
@@ -9,63 +8,88 @@ load_dotenv()
9
 
10
  app = FastAPI()
11
 
12
- # Bearer token for API authentication
13
  BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
14
-
15
- # Base URL for the Meducine API
16
  BASE_URL = os.getenv("BASE_URL")
17
 
18
- # OAuth2PasswordBearer provides the token as a dependency
19
- oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/login")
20
-
21
- @app.post("/login")
22
  async def login(email: str = Form(...), password: str = Form(...)):
23
- async with httpx.AsyncClient() as client:
24
- try:
25
- response = await client.post(
26
- f"{BASE_URL}/actions/meducine-restapi/auth/login",
27
- data={"email": email, "password": password},
28
- headers={"Authorization": f"Bearer {BEARER_TOKEN}"}
29
- )
30
- response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
31
- return handle_response(response) # Assuming this function formats the response correctly
32
- except httpx.HTTPStatusError as e:
33
- raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
34
- except Exception as e:
35
- raise HTTPException(status_code=500, detail=str(e))
36
-
37
- @app.post("/actions/meducine-restapi/auth/logout")
38
- async def logout(email: str = Form(...), password: str = Form(...)):
39
- async with httpx.AsyncClient() as client:
40
- response = await client.post(
41
- f"{BASE_URL}/actions/meducine-restapi/auth/logout",
42
  data={"email": email, "password": password},
43
- headers={"Authorization": f"Bearer {BEARER_TOKEN}"}
44
  )
45
- return handle_response(response)
 
 
46
 
47
- @app.get("/actions/meducine-restapi/auth/identity")
48
- async def get_identity(token: str = Depends(oauth2_scheme)):
49
- async with httpx.AsyncClient() as client:
50
- response = await client.get(
51
- f"{BASE_URL}/actions/meducine-restapi/auth/identity",
52
- headers={"Authorization": f"Bearer {token}"}
 
 
 
 
 
 
53
  )
54
- return handle_response(response)
 
 
55
 
56
- @app.get("/actions/meducine-restapi/user/has-premium-access")
57
- async def check_premium_access(feature: str, token: str = Depends(oauth2_scheme)):
58
- async with httpx.AsyncClient() as client:
59
- response = await client.get(
60
- f"{BASE_URL}/actions/meducine-restapi/user/has-premium-access",
 
 
 
 
 
61
  params={"feature": feature},
62
- headers={"Authorization": f"Bearer {token}"}
63
  )
64
- return handle_response(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  def handle_response(response: httpx.Response):
67
  """
68
- Handles the response from the Meducine API, returning appropriate responses based on status codes.
69
  """
70
  if response.status_code in range(200, 300):
71
  return response.json() # Successful request
@@ -76,6 +100,8 @@ def handle_response(response: httpx.Response):
76
  else:
77
  raise HTTPException(status_code=500, detail="Unexpected error")
78
 
 
 
79
  # Run the application
80
  if __name__ == "__main__":
81
  import uvicorn
 
1
+ from fastapi import FastAPI, HTTPException, Form
 
2
  import httpx
3
  import os
4
  from dotenv import load_dotenv
 
8
 
9
  app = FastAPI()
10
 
11
+ # Bearer token and base URL for external API (from environment variables)
12
  BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
 
 
13
  BASE_URL = os.getenv("BASE_URL")
14
 
15
+ @app.post("/actions/meducine-restapi/auth/login")
 
 
 
16
  async def login(email: str = Form(...), password: str = Form(...)):
17
+ """
18
+ Handles login by sending a request to the external API with the static Bearer token.
19
+ Even though it simulates a login, it uses the static Bearer token for authentication.
20
+ """
21
+ try:
22
+ # Send login request (simulates login but uses static Bearer token)
23
+ response = await make_request(
24
+ url=f"{BASE_URL}/actions/meducine-restapi/auth/login",
 
 
 
 
 
 
 
 
 
 
 
25
  data={"email": email, "password": password},
26
+ method="POST"
27
  )
28
+ return response
29
+ except Exception as e:
30
+ raise HTTPException(status_code=500, detail=f"Login failed: {str(e)}")
31
 
32
+
33
+ @app.post("/actions/meducine-restapi/auth/logout")
34
+ async def logout(email: str = Form(...)):
35
+ """
36
+ Handles logout using the static Bearer token.
37
+ """
38
+ try:
39
+ # Simulates logging out but uses the static Bearer token
40
+ response = await make_request(
41
+ url=f"{BASE_URL}/actions/meducine-restapi/auth/logout",
42
+ data={"email": email},
43
+ method="POST"
44
  )
45
+ return {"message": "Logout successful", "response": response}
46
+ except Exception as e:
47
+ raise HTTPException(status_code=500, detail=f"Logout failed: {str(e)}")
48
 
49
+
50
+ @app.post("/meducine-restapi/user/has-premium-access")
51
+ async def check_premium_access(feature: str, email: str = Form(...), password: str = Form(...)):
52
+ """
53
+ Checks if the user has premium access to a feature, using the static Bearer token for authentication.
54
+ """
55
+ try:
56
+ response = await make_request(
57
+ url=f"{BASE_URL}/actions/meducine-restapi/user/has-premium-access",
58
+ data={"email": email, "password": password},
59
  params={"feature": feature},
60
+ method="POST"
61
  )
62
+ return response
63
+ except Exception as e:
64
+ raise HTTPException(status_code=500, detail=f"Premium access check failed: {str(e)}")
65
+
66
+
67
+ async def make_request(url: str, data: dict = None, method: str = "GET", params: dict = None):
68
+ """
69
+ Helper function to make an HTTP request to the external API with the static Bearer token.
70
+ """
71
+ headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
72
+
73
+ async with httpx.AsyncClient() as client:
74
+ try:
75
+ if method == "POST":
76
+ response = await client.post(url, data=data, params=params, headers=headers)
77
+ elif method == "GET":
78
+ response = await client.get(url, params=params, headers=headers)
79
+ else:
80
+ raise HTTPException(status_code=405, detail="Method not allowed")
81
+
82
+ response.raise_for_status() # Raise exception for 4xx or 5xx errors
83
+ return handle_response(response)
84
+ except httpx.HTTPStatusError as e:
85
+ raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
86
+ except Exception as e:
87
+ raise HTTPException(status_code=500, detail=str(e))
88
+
89
 
90
  def handle_response(response: httpx.Response):
91
  """
92
+ Handles the API response, returning JSON data or raising exceptions based on status codes.
93
  """
94
  if response.status_code in range(200, 300):
95
  return response.json() # Successful request
 
100
  else:
101
  raise HTTPException(status_code=500, detail="Unexpected error")
102
 
103
+
104
+
105
  # Run the application
106
  if __name__ == "__main__":
107
  import uvicorn
api/router/topic.py DELETED
@@ -1,69 +0,0 @@
1
- from fastapi import Form, APIRouter, File, UploadFile, HTTPException, Request
2
- from db.repository import get_db_conn
3
- from db.get_data import GetDatabase
4
- from db.save_data import InsertDatabase
5
- from config import MYSQL_CONFIG
6
- from api.function import data_ingestion, get_data, delete_data, update_data
7
- from script.vector_db import IndexManager
8
- from service.dto import MetadataRequest
9
-
10
- router = APIRouter(tags=["Topics"])
11
-
12
- db_conn = get_db_conn(MYSQL_CONFIG)
13
- get_database = GetDatabase(db_conn)
14
- index_manager = IndexManager()
15
-
16
-
17
- @router.post("/topic")
18
- async def upload_file(
19
- title: str = Form(...),
20
- author: str = Form(...),
21
- category: str = Form(...),
22
- year: int = Form(...),
23
- publisher: str = Form(...),
24
- file: UploadFile = File(...),
25
- # content_table: UploadFile = File(...)
26
- ):
27
-
28
- reference = {
29
- "title": title,
30
- "author": author,
31
- "category": category,
32
- "year": year,
33
- "publisher": publisher,
34
- }
35
-
36
- # response = await data_ingestion(db_conn, reference, file, content_table)
37
- response = await data_ingestion(db_conn, reference, file)
38
- return {"filename": file.filename, "response": response}
39
-
40
-
41
- @router.get("/topic")
42
- async def get_metadata():
43
- results = await get_data(db_conn)
44
- return results
45
-
46
-
47
- @router.put("/topic/{id}")
48
- async def update_metadata(id: int, reference: MetadataRequest):
49
- try :
50
- old_reference = await get_database.get_data_by_id(id)
51
- index_manager.update_vector_database(old_reference, reference)
52
-
53
- return await update_data(id, reference, db_conn)
54
- except Exception as e:
55
- raise HTTPException(status_code=500, detail="An error occurred while updating metadata")
56
-
57
-
58
- @router.delete("/topic/{id}")
59
- async def delete_metadata(id: int):
60
- try:
61
- old_reference = await get_database.get_data_by_id(id)
62
- index_manager.delete_vector_database(old_reference)
63
-
64
- return await delete_data(id, db_conn)
65
-
66
- except Exception as e:
67
- print(e)
68
- raise HTTPException(status_code=500, detail="An error occurred while delete metadata")
69
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/router/user.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  from datetime import timedelta
2
  from typing import Annotated
3
 
@@ -12,6 +15,12 @@ from db.database import get_db
12
  from api.auth import get_current_user, create_access_token
13
  from service.dto import CreateUserRequest, UserVerification, Token
14
 
 
 
 
 
 
 
15
 
16
  router = APIRouter(tags=["User"])
17
 
@@ -23,19 +32,82 @@ user_dependency = Annotated[dict, Depends(get_current_user)]
23
  ACCESS_TOKEN_EXPIRE_MINUTES = 43200
24
 
25
 
 
 
 
 
 
 
 
 
 
26
  @router.post("/login", response_model=Token)
27
  async def login_for_access_token(
28
  login_data: Annotated[OAuth2PasswordRequestForm, Depends()],
29
  db: Session = Depends(get_db),
30
  ):
31
- user = db.query(User).filter(User.username == login_data.username).first()
32
-
33
- if not user or not bcrypt_context.verify(login_data.password, user.password_hash):
34
- return JSONResponse(
35
- status_code=status.HTTP_401_UNAUTHORIZED,
36
- content="Incorrect username or password",
37
- headers={"WWW-Authenticate": "Bearer"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  try:
41
  access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
@@ -52,7 +124,7 @@ async def login_for_access_token(
52
 
53
  except Exception as e:
54
  print(e)
55
- return JSONResponse(status_code=500, content="An error occuring when login")
56
 
57
 
58
  @router.get("/login", response_model=dict)
@@ -90,7 +162,6 @@ async def get_all_users(user: user_dependency, db: Session = Depends(get_db)):
90
  ]
91
 
92
 
93
- @router.post("/register")
94
  async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
95
  existing_user = (
96
  db.query(User).filter(User.email == create_user_request.email).first()
@@ -122,27 +193,27 @@ async def register_user(db: db_dependency, create_user_request: CreateUserReques
122
  )
123
 
124
 
125
- @router.post("/forgot_password")
126
- async def forget_password():
127
- pass
128
 
129
 
130
- @router.post("/change_password")
131
- async def change_password(
132
- user: user_dependency, db: db_dependency, user_verification: UserVerification
133
- ):
134
- if user is None:
135
- return JSONResponse(status_code=401, content="Authentication Failed")
136
- user_model = db.query(User).filter(User.id == user.get("id")).first()
137
 
138
- if not bcrypt_context.verify(
139
- user_verification.password, user_model.hashed_password
140
- ):
141
- return JSONResponse(status_code=401, content="Error on password change")
142
 
143
- user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
144
- db.add(user_model)
145
- db.commit()
146
- db.refresh(user_model)
147
 
148
- return {"message": "User's password successfully changed", "user_id": user_model.id}
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
  from datetime import timedelta
5
  from typing import Annotated
6
 
 
15
  from api.auth import get_current_user, create_access_token
16
  from service.dto import CreateUserRequest, UserVerification, Token
17
 
18
+ from collections import Counter
19
+ from time import time
20
+
21
+
22
+ load_dotenv()
23
+
24
 
25
  router = APIRouter(tags=["User"])
26
 
 
32
  ACCESS_TOKEN_EXPIRE_MINUTES = 43200
33
 
34
 
35
+ # Rate-limiting config
36
+ FAILED_ATTEMPT_LIMIT = 3
37
+ BLOCK_TIME_SECONDS = 300 # Block for 5 minutes
38
+
39
+ # In-memory tracking for failed attempts
40
+ failed_attempts = Counter()
41
+ blocked_users = {}
42
+
43
+
44
  @router.post("/login", response_model=Token)
45
  async def login_for_access_token(
46
  login_data: Annotated[OAuth2PasswordRequestForm, Depends()],
47
  db: Session = Depends(get_db),
48
  ):
49
+ username = login_data.username
50
+
51
+ # Check if user is blocked
52
+ if username in blocked_users:
53
+ block_until = blocked_users[username]
54
+ if time() < block_until:
55
+ return JSONResponse(
56
+ status_code=status.HTTP_403_FORBIDDEN,
57
+ content=f"Too many failed attempts. Try again after {int(block_until - time())} seconds.",
58
+ )
59
+
60
+ else:
61
+ # Unblock the user after the time period
62
+ del blocked_users[username]
63
+ del failed_attempts[username]
64
+
65
+ user = db.query(User).filter(User.username == username).first()
66
+
67
+ if not user:
68
+ # Automatically register the user
69
+ create_user_request = CreateUserRequest(
70
+ name=login_data.username,
71
+ username=login_data.username,
72
+ email=login_data.username,
73
+ password=os.getenv("USER_PASSWORD"), # Replace with a generated or temporary password
74
+ role_id=2,
75
+ )
76
+ registration_response = await register_user(db, create_user_request)
77
+
78
+ if isinstance(registration_response, JSONResponse):
79
+ return registration_response # Return error response if registration failed
80
+
81
+ # Retrieve the newly created user after successful registration
82
+ user = db.query(User).filter(User.username == username).first()
83
+
84
+ if not user:
85
+ return JSONResponse(
86
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
87
+ content="User registration failed unexpectedly."
88
+ )
89
+
90
+ correct_password = (
91
+ bcrypt_context.verify(os.getenv("USER_PASSWORD"), user.password_hash) or
92
+ bcrypt_context.verify(login_data.password, user.password_hash)
93
  )
94
+
95
+ if not correct_password :
96
+ failed_attempts[username] = failed_attempts.get(username, 0) + 1
97
+ if failed_attempts[username] >= FAILED_ATTEMPT_LIMIT:
98
+ blocked_users[username] = time() + BLOCK_TIME_SECONDS
99
+ failed_attempts.pop(username, None) # Reset after blocking
100
+ return JSONResponse(
101
+ status_code=status.HTTP_403_FORBIDDEN,
102
+ content="Too many failed attempts. You are temporarily blocked."
103
+ )
104
+
105
+ return JSONResponse(
106
+ status_code=status.HTTP_401_UNAUTHORIZED,
107
+ content="Invalid credentials."
108
+ )
109
+
110
+ failed_attempts.pop(username, None)
111
 
112
  try:
113
  access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
 
124
 
125
  except Exception as e:
126
  print(e)
127
+ return JSONResponse(status_code=500, content="An error occurred during login")
128
 
129
 
130
  @router.get("/login", response_model=dict)
 
162
  ]
163
 
164
 
 
165
  async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
166
  existing_user = (
167
  db.query(User).filter(User.email == create_user_request.email).first()
 
193
  )
194
 
195
 
196
+ # @router.post("/forgot_password")
197
+ # async def forget_password():
198
+ # pass
199
 
200
 
201
+ # @router.post("/change_password")
202
+ # async def change_password(
203
+ # user: user_dependency, db: db_dependency, user_verification: UserVerification
204
+ # ):
205
+ # if user is None:
206
+ # return JSONResponse(status_code=401, content="Authentication Failed")
207
+ # user_model = db.query(User).filter(User.id == user.get("id")).first()
208
 
209
+ # if not bcrypt_context.verify(
210
+ # user_verification.password, user_model.hashed_password
211
+ # ):
212
+ # return JSONResponse(status_code=401, content="Error on password change")
213
 
214
+ # user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
215
+ # db.add(user_model)
216
+ # db.commit()
217
+ # db.refresh(user_model)
218
 
219
+ # return {"message": "User's password successfully changed", "user_id": user_model.id}
api/util/util.py DELETED
File without changes
app.py CHANGED
@@ -23,7 +23,7 @@ def create_instance() -> FastAPI:
23
  def add_middleware(app: FastAPI) -> FastAPI:
24
  app.add_middleware(
25
  CORSMiddleware,
26
- allow_origins=["https://chatbook-multimedika.vercel.app/","http://localhost:3000"],
27
  allow_credentials=True,
28
  allow_methods=["*"],
29
  allow_headers=["*"],
@@ -48,7 +48,7 @@ def register_routers(app: FastAPI) -> FastAPI:
48
  app.include_router(health.router)
49
 
50
  return app
51
- ""
52
 
53
  def init_app() -> FastAPI:
54
  app: FastAPI = pipe(
 
23
  def add_middleware(app: FastAPI) -> FastAPI:
24
  app.add_middleware(
25
  CORSMiddleware,
26
+ allow_origins=["*"],
27
  allow_credentials=True,
28
  allow_methods=["*"],
29
  allow_headers=["*"],
 
48
  app.include_router(health.router)
49
 
50
  return app
51
+
52
 
53
  def init_app() -> FastAPI:
54
  app: FastAPI = pipe(
config.py CHANGED
@@ -30,7 +30,7 @@ class PineconeConfig(BaseSettings):
30
  class GPTBotConfig(BaseSettings):
31
  temperature : float = 0.3
32
  model : str = "gpt-4o-mini"
33
- max_tokens : int = 512
34
  streaming : bool = False
35
  api_key : str = os.environ.get("OPENAI_API_KEY")
36
 
 
30
  class GPTBotConfig(BaseSettings):
31
  temperature : float = 0.3
32
  model : str = "gpt-4o-mini"
33
+ max_tokens : int = 4096
34
  streaming : bool = False
35
  api_key : str = os.environ.get("OPENAI_API_KEY")
36
 
controller/__init__.py DELETED
File without changes
controller/book_collection_controller.py DELETED
File without changes
controller/book_controller.py DELETED
File without changes
controller/bot_general_controller.py DELETED
File without changes
controller/bot_one_controller.py DELETED
File without changes
controller/bot_specific_controller.py DELETED
File without changes
controller/category_controller.py DELETED
File without changes
controller/user_controller.py DELETED
File without changes
core/book_enabler/__init__.py DELETED
File without changes
core/chat/bot_service.py CHANGED
@@ -69,7 +69,6 @@ class ChatCompletionService:
69
 
70
  if self.type_bot == "general":
71
  response = redesign_structure_message(response, metadata_collection)
72
- print(response)
73
 
74
  # Save the message to chat store
75
  self._store_message_in_chatstore(response, metadata_collection)
@@ -136,6 +135,30 @@ class ChatCompletionService:
136
  print("No sources available")
137
 
138
  return contents, metadata_collection, scores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  def _attach_contents_to_metadata(self, contents, metadata_collection):
141
  for i in range(min(len(contents), len(metadata_collection))):
@@ -171,5 +194,4 @@ class ChatCompletionService:
171
 
172
  db = self.client["bot_database"] # Replace with your database name
173
  collection = db[self.session_id] # Replace with your collection name
174
- result = collection.insert_many(chat_history_json)
175
- print("Data inserted with record ids", result.inserted_ids)
 
69
 
70
  if self.type_bot == "general":
71
  response = redesign_structure_message(response, metadata_collection)
 
72
 
73
  # Save the message to chat store
74
  self._store_message_in_chatstore(response, metadata_collection)
 
135
  print("No sources available")
136
 
137
  return contents, metadata_collection, scores
138
+
139
+ # def _process_sources_images(self, sources, number_reference_sorted):
140
+ # contents, metadata_collection, scores = [], [], []
141
+ # if not number_reference_sorted:
142
+ # print("There are no references")
143
+ # return contents, metadata_collection, scores
144
+
145
+ # for number in range (len(sources)):
146
+ # number = int(number)
147
+ # if sources and len(sources) > 0:
148
+ # node = dict(sources[0])["raw_output"].source_nodes
149
+ # if 0 <= number - 1 < len(node):
150
+ # content = node[number - 1].node.get_text()
151
+ # contents.append(content)
152
+ # metadata = dict(node[number - 1].node.metadata)
153
+ # metadata_collection.append(metadata)
154
+ # score = node[number - 1].score
155
+ # scores.append(score)
156
+ # else:
157
+ # print(f"Invalid reference number: {number}")
158
+ # else:
159
+ # print("No sources available")
160
+
161
+ # return contents, metadata_collection, scores
162
 
163
  def _attach_contents_to_metadata(self, contents, metadata_collection):
164
  for i in range(min(len(contents), len(metadata_collection))):
 
194
 
195
  db = self.client["bot_database"] # Replace with your database name
196
  collection = db[self.session_id] # Replace with your collection name
197
+ collection.insert_many(chat_history_json)
 
core/chat/bot_service_multimodal.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ import os
4
+ import pytz
5
+
6
+ from typing import List
7
+ from datetime import datetime
8
+ from datetime import timedelta
9
+ from fastapi.responses import JSONResponse
10
+ from script.vector_db import IndexManager
11
+ from llama_index.core.llms import MessageRole
12
+
13
+ from core.chat.engine import Engine
14
+ from core.chat.chatstore import ChatStore
15
+ from core.parser import (
16
+ filter_metadata_by_pages,
17
+ extract_sorted_page_numbers
18
+
19
+ )
20
+
21
+ from service.dto import ChatMessage
22
+ from pymongo.mongo_client import MongoClient
23
+
24
+
25
+ class ChatCompletionService:
26
+ def __init__(
27
+ self,
28
+ session_id: str,
29
+ user_request: str,
30
+ titles: List = None,
31
+ type_bot: str = "general",
32
+ ):
33
+ self.session_id = session_id
34
+ self.user_request = user_request
35
+ self.titles = titles
36
+ self.type_bot = type_bot
37
+ self.client = MongoClient(os.getenv("MONGO_URI"))
38
+ self.engine = Engine()
39
+ self.index_manager = IndexManager()
40
+ self.chatstore = ChatStore()
41
+
42
+ def generate_completion(self):
43
+ if not self._ping_mongo():
44
+ return JSONResponse(
45
+ status_code=500, content="Database Error: Unable to connect to MongoDB"
46
+ )
47
+
48
+ try:
49
+ # Load and retrieve chat engine with appropriate index
50
+ index = self.index_manager.load_existing_indexes()
51
+ chat_engine = self._get_chat_engine(index)
52
+
53
+ # Generate chat response
54
+ response = chat_engine.chat(self.user_request)
55
+
56
+ sources = response.source_nodes
57
+
58
+ contents, metadata_collection, scores = self._process_sources_images(sources)
59
+
60
+ # Update response and renumber sources
61
+ response = str(response)
62
+
63
+ # Add contents to metadata
64
+ metadata_collection = self._attach_contents_to_metadata(
65
+ contents, metadata_collection
66
+ )
67
+
68
+ page_sources = extract_sorted_page_numbers(response)
69
+ metadata_collection = filter_metadata_by_pages(metadata_collection, page_sources)
70
+
71
+ # Save the message to chat store
72
+ self._store_message_in_chatstore(response, metadata_collection)
73
+
74
+ except Exception as e:
75
+ logging.error(f"An error occurred in generate text: {e}")
76
+ return JSONResponse(
77
+ status_code=500, content=f"An internal server error occurred: {e}"
78
+ )
79
+
80
+ try:
81
+ if self.type_bot == "specific":
82
+ self._save_chat_history_to_db(response, metadata_collection)
83
+
84
+ return str(response), metadata_collection, scores
85
+
86
+ except Exception as e:
87
+ logging.error(f"An error occurred while saving chat history: {e}")
88
+ return JSONResponse(
89
+ status_code=500,
90
+ content=f"An internal server error occurred while saving chat history: {e}",
91
+ )
92
+
93
+ def _ping_mongo(self):
94
+ try:
95
+ self.client.admin.command("ping")
96
+ print("Pinged your deployment. Successfully connected to MongoDB!")
97
+ return True
98
+ except Exception as e:
99
+ logging.error(f"MongoDB connection failed: {e}")
100
+ return False
101
+
102
+ def _get_chat_engine(self, index):
103
+ if self.type_bot == "general":
104
+ return self.engine.get_chat_engine(self.session_id, index)
105
+ return self.engine.get_chat_engine(
106
+ self.session_id, index, self.titles, self.type_bot
107
+ )
108
+
109
+ def _extract_sorted_references(self, response):
110
+ number_reference = list(set(re.findall(r"\[(\d+)\]", str(response))))
111
+ return sorted(number_reference)
112
+
113
+
114
+ def _process_sources_images(self, sources):
115
+ contents, metadata_collection, scores = [], [], []
116
+
117
+ for number in range (len(sources)):
118
+ if sources and len(sources) > 0:
119
+ content = sources[number - 1].node.get_text()
120
+ contents.append(content)
121
+ metadata = dict(sources[number - 1].node.metadata)
122
+ metadata_collection.append(metadata)
123
+ score = sources[number - 1].score
124
+ scores.append(score)
125
+ else:
126
+ print("No sources available")
127
+
128
+ return contents, metadata_collection, scores
129
+
130
+ def _attach_contents_to_metadata(self, contents, metadata_collection):
131
+ for i in range(min(len(contents), len(metadata_collection))):
132
+ metadata_collection[i]["content"] = contents[i]
133
+ return metadata_collection
134
+
135
+ def _store_message_in_chatstore(self, response, metadata_collection):
136
+ message = ChatMessage(
137
+ role=MessageRole.ASSISTANT,
138
+ content=response,
139
+ metadata=metadata_collection,
140
+ )
141
+ self.chatstore.delete_last_message(self.session_id)
142
+ self.chatstore.add_message(self.session_id, message)
143
+ self.chatstore.clean_message(self.session_id)
144
+
145
+ def _save_chat_history_to_db(self, response, metadata_collection):
146
+ jakarta_tz = pytz.timezone("Asia/Jakarta")
147
+ time_now = datetime.now(jakarta_tz)
148
+ user_timestamp = time_now - timedelta(seconds=0.2)
149
+ chat_history_db = [
150
+ ChatMessage(
151
+ role=MessageRole.USER,
152
+ content=self.user_request,
153
+ timestamp=user_timestamp,
154
+ payment="free" if self.type_bot == "general" else None,
155
+ ),
156
+ ChatMessage(
157
+ role=MessageRole.ASSISTANT,
158
+ content=response,
159
+ metadata=metadata_collection,
160
+ timestamp=time_now,
161
+ payment="free" if self.type_bot == "general" else None,
162
+ ),
163
+ ]
164
+
165
+ chat_history_json = [message.model_dump() for message in chat_history_db]
166
+
167
+ db = self.client["bot_database"] # Replace with your database name
168
+ collection = db[self.session_id] # Replace with your collection name
169
+ collection.insert_many(chat_history_json)
core/chat/chatstore.py CHANGED
@@ -1,6 +1,10 @@
1
  import redis
2
  import os
3
  import json
 
 
 
 
4
  from fastapi.responses import JSONResponse
5
  from typing import Optional, List, Dict
6
  from llama_index.storage.chat_store.redis import RedisChatStore
@@ -8,44 +12,50 @@ from pymongo.mongo_client import MongoClient
8
  from llama_index.core.memory import ChatMemoryBuffer
9
  from service.dto import ChatMessage
10
 
 
 
11
 
12
  class ChatStore:
13
  def __init__(self):
14
  self.redis_client = redis.Redis(
15
- host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
16
- port=10365,
17
- password=os.environ.get("REDIS_PASSWORD"),
 
 
18
  )
19
 
20
  uri = os.getenv("MONGO_URI")
21
  self.client = MongoClient(uri)
22
 
23
  def initialize_memory_bot(self, session_id):
24
-
 
 
25
  chat_store = RedisChatStore(
26
  redis_client=self.redis_client, ttl=86400 # Time-to-live set for 1 hour
27
  )
28
-
29
  db = self.client["bot_database"]
30
-
31
- if (
32
- self.redis_client.exists(session_id)
33
- or session_id in db.list_collection_names()
34
- ):
35
- if session_id not in self.redis_client.keys():
36
- self.add_chat_history_to_redis(
37
- session_id
38
- ) # Add chat history to Redis if not found
39
- # Create memory buffer with chat store and session key
 
40
  memory = ChatMemoryBuffer.from_defaults(
41
  token_limit=3000, chat_store=chat_store, chat_store_key=session_id
42
  )
43
  else:
44
- # Handle the case where the session doesn't exist
45
  memory = ChatMemoryBuffer.from_defaults(
46
  token_limit=3000, chat_store=chat_store, chat_store_key=session_id
47
  )
48
-
49
  return memory
50
 
51
  def get_messages(self, session_id: str) -> List[dict]:
@@ -56,14 +66,14 @@ class ChatStore:
56
 
57
  # Decode and parse each item into a dictionary
58
  return [json.loads(m.decode("utf-8")) for m in items]
59
-
60
  def get_last_message(self, session_id: str) -> Optional[Dict]:
61
  """Get the last message for a session_id."""
62
  last_message = self.redis_client.lindex(session_id, -1)
63
-
64
  if last_message is None:
65
  return None # Return None if there are no messages
66
-
67
  # Decode and parse the last message into a dictionary
68
  return json.loads(last_message.decode("utf-8"))
69
 
@@ -73,11 +83,13 @@ class ChatStore:
73
 
74
  # Get the last document by sorting by _id in descending order
75
  last_document = collection.find().sort("_id", -1).limit(1)
76
-
 
77
  for doc in last_document:
78
- doc["content"]
79
-
80
- return str(doc["content"])
 
81
 
82
  def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
83
  """Delete last message for a session_id."""
@@ -113,21 +125,23 @@ class ChatStore:
113
  def get_keys(self) -> List[str]:
114
  """Get all keys."""
115
  try:
116
- print(self.redis_client.keys("*"))
117
  return [key.decode("utf-8") for key in self.redis_client.keys("*")]
118
 
119
  except Exception as e:
120
- # Log the error and return JSONResponse for FastAPI
121
- print(f"An error occurred in update data.: {e}")
122
  return JSONResponse(status_code=400, content="the error when get keys")
123
 
124
- def add_message(self, session_id: str, message: ChatMessage) -> None:
125
  """Add a message for a session_id."""
126
  item = json.dumps(self._message_to_dict(message))
127
  self.redis_client.rpush(session_id, item)
128
 
129
- def _message_to_dict(self, message: ChatMessage) -> dict:
130
- return message.model_dump()
 
 
 
 
 
131
 
132
  def add_chat_history_to_redis(self, session_id: str) -> None:
133
  """Fetch chat history from MongoDB and add it to Redis."""
@@ -169,18 +183,11 @@ class ChatStore:
169
 
170
  # Convert the cursor to a list and exclude the _id field
171
  documents_list = [
172
- {key: doc[key] for key in doc if key != "_id" and doc[key] is not None}
173
  for doc in documents
174
  ]
175
 
176
- # Print the list of documents without the _id field
177
- print(documents_list) # Optional: If you want to see the output
178
-
179
  return documents_list
180
-
181
  except Exception as e:
182
- print(f"An error occurred while retrieving messages: {e}")
183
- return JSONResponse(
184
- status_code=500,
185
- content=f"An error occurred while retrieving messages: {e}",
186
- )
 
1
  import redis
2
  import os
3
  import json
4
+
5
+ from datetime import datetime
6
+ from dotenv import load_dotenv
7
+
8
  from fastapi.responses import JSONResponse
9
  from typing import Optional, List, Dict
10
  from llama_index.storage.chat_store.redis import RedisChatStore
 
12
  from llama_index.core.memory import ChatMemoryBuffer
13
  from service.dto import ChatMessage
14
 
15
+ load_dotenv()
16
+
17
 
18
  class ChatStore:
19
  def __init__(self):
20
  self.redis_client = redis.Redis(
21
+ # host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
22
+ host = os.getenv("REDIS_HOST"),
23
+ port=os.getenv("REDIS_PORT"),
24
+ username = os.getenv("REDIS_USERNAME"),
25
+ password=os.getenv("REDIS_PASSWORD"),
26
  )
27
 
28
  uri = os.getenv("MONGO_URI")
29
  self.client = MongoClient(uri)
30
 
31
  def initialize_memory_bot(self, session_id):
32
+ # Decode Redis keys to work with strings
33
+ redis_keys = [key.decode('utf-8') for key in self.redis_client.keys()]
34
+
35
  chat_store = RedisChatStore(
36
  redis_client=self.redis_client, ttl=86400 # Time-to-live set for 1 hour
37
  )
 
38
  db = self.client["bot_database"]
39
+
40
+ # Check if the session exists in Redis or MongoDB
41
+ if session_id in redis_keys:
42
+ # If the session already exists in Redis, create the memory buffer using Redis
43
+ memory = ChatMemoryBuffer.from_defaults(
44
+ token_limit=3000, chat_store=chat_store, chat_store_key=session_id
45
+ )
46
+ elif session_id in db.list_collection_names():
47
+ # If the session exists in MongoDB but not Redis, fetch messages from MongoDB
48
+ self.add_chat_history_to_redis(session_id) # Add chat history to Redis
49
+ # Then create the memory buffer using Redis
50
  memory = ChatMemoryBuffer.from_defaults(
51
  token_limit=3000, chat_store=chat_store, chat_store_key=session_id
52
  )
53
  else:
54
+ # If the session doesn't exist in either Redis or MongoDB, create an empty memory buffer
55
  memory = ChatMemoryBuffer.from_defaults(
56
  token_limit=3000, chat_store=chat_store, chat_store_key=session_id
57
  )
58
+
59
  return memory
60
 
61
  def get_messages(self, session_id: str) -> List[dict]:
 
66
 
67
  # Decode and parse each item into a dictionary
68
  return [json.loads(m.decode("utf-8")) for m in items]
69
+
70
  def get_last_message(self, session_id: str) -> Optional[Dict]:
71
  """Get the last message for a session_id."""
72
  last_message = self.redis_client.lindex(session_id, -1)
73
+
74
  if last_message is None:
75
  return None # Return None if there are no messages
76
+
77
  # Decode and parse the last message into a dictionary
78
  return json.loads(last_message.decode("utf-8"))
79
 
 
83
 
84
  # Get the last document by sorting by _id in descending order
85
  last_document = collection.find().sort("_id", -1).limit(1)
86
+
87
+ # Iterasi last_document dan kembalikan isi content jika ada
88
  for doc in last_document:
89
+ return str(doc.get('content', "")) # kembalikan content atau string kosong jika tidak ada
90
+
91
+ # Jika tidak ada dokumen, kembalikan string kosong
92
+ return ""
93
 
94
  def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
95
  """Delete last message for a session_id."""
 
125
  def get_keys(self) -> List[str]:
126
  """Get all keys."""
127
  try:
 
128
  return [key.decode("utf-8") for key in self.redis_client.keys("*")]
129
 
130
  except Exception as e:
 
 
131
  return JSONResponse(status_code=400, content="the error when get keys")
132
 
133
+ def add_message(self, session_id: str, message: Optional[ChatMessage]) -> None:
134
  """Add a message for a session_id."""
135
  item = json.dumps(self._message_to_dict(message))
136
  self.redis_client.rpush(session_id, item)
137
 
138
+ def _message_to_dict(self, message: Optional[ChatMessage]) -> dict:
139
+ # Convert the ChatMessage instance into a dictionary with necessary adjustments
140
+ message_dict = message.model_dump()
141
+ # Convert any datetime fields to ISO format, if needed
142
+ if isinstance(message_dict.get('timestamp'), datetime):
143
+ message_dict['timestamp'] = message_dict['timestamp'].isoformat()
144
+ return message_dict
145
 
146
  def add_chat_history_to_redis(self, session_id: str) -> None:
147
  """Fetch chat history from MongoDB and add it to Redis."""
 
183
 
184
  # Convert the cursor to a list and exclude the _id field
185
  documents_list = [
186
+ {key: doc[key] for key in doc if key !="_id" and doc[key] is not None}
187
  for doc in documents
188
  ]
189
 
 
 
 
190
  return documents_list
191
+
192
  except Exception as e:
193
+ return JSONResponse(status_code=500, content=f"An error occurred while retrieving messages: {e}")
 
 
 
 
core/chat/engine.py CHANGED
@@ -8,11 +8,14 @@ from llama_index.core.tools import QueryEngineTool, ToolMetadata
8
  from llama_index.agent.openai import OpenAIAgent
9
  from llama_index.llms.openai import OpenAI
10
  from llama_index.core.query_engine import CitationQueryEngine
 
 
11
  from llama_index.core import Settings
12
  from core.chat.chatstore import ChatStore
 
13
 
14
  from config import GPTBOT_CONFIG
15
- from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS
16
  from core.parser import join_list
17
 
18
 
@@ -27,8 +30,11 @@ class Engine:
27
 
28
  self.chat_store = ChatStore()
29
  Settings.llm = self.llm
 
 
30
 
31
  def get_citation_engine(self, titles:List, index):
 
32
  filters = [
33
  MetadataFilter(
34
  key="title",
@@ -41,41 +47,66 @@ class Engine:
41
  filters = MetadataFilters(filters=filters, condition="or")
42
 
43
  # Create the QueryEngineTool with the index and filters
44
- kwargs = {"similarity_top_k": 5, "filters": filters}
45
 
46
  retriever = index.as_retriever(**kwargs)
47
 
48
  # citation_engine = CitationQueryEngine(retriever=retriever)
49
 
50
- return CitationQueryEngine.from_args(index, retriever=retriever)
 
 
51
 
52
  def get_chat_engine(self, session_id, index, titles=None, type_bot="general"):
53
  # Create the QueryEngineTool based on the type
54
  if type_bot == "general":
55
  # query_engine = index.as_query_engine(similarity_top_k=3)
56
- citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
57
- description = "A book containing information about medicine"
 
 
 
58
  else:
59
  citation_engine = self.get_citation_engine(titles, index)
60
- description = "A book containing information about medicine"
61
 
62
- metadata = ToolMetadata(name="bot-belajar", description=description)
63
- print(metadata)
64
 
65
- vector_query_engine = QueryEngineTool(
66
- query_engine=citation_engine, metadata=metadata
 
 
 
 
 
 
 
 
67
  )
68
- print(vector_query_engine)
69
 
70
  # Initialize the OpenAI agent with the tools
71
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  if type_bot == "general":
73
- system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information="")
74
  else:
75
  additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
76
- system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
 
77
  chat_engine = OpenAIAgent.from_tools(
78
- tools=[vector_query_engine],
79
  llm=self.llm,
80
  memory=self.chat_store.initialize_memory_bot(session_id),
81
  system_prompt=system_prompt,
 
8
  from llama_index.agent.openai import OpenAIAgent
9
  from llama_index.llms.openai import OpenAI
10
  from llama_index.core.query_engine import CitationQueryEngine
11
+ from llama_index.embeddings.openai import OpenAIEmbedding
12
+ from llama_index.multi_modal_llms.openai import OpenAIMultiModal
13
  from llama_index.core import Settings
14
  from core.chat.chatstore import ChatStore
15
+ from core.multimodal import MultimodalQueryEngine
16
 
17
  from config import GPTBOT_CONFIG
18
+ from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS,SYSTEM_BOT_GENERAL_TEMPLATE, SYSTEM_BOT_IMAGE_TEMPLATE
19
  from core.parser import join_list
20
 
21
 
 
30
 
31
  self.chat_store = ChatStore()
32
  Settings.llm = self.llm
33
+ embed_model = OpenAIEmbedding(model="text-embedding-3-large")
34
+ Settings.embed_model = embed_model
35
 
36
  def get_citation_engine(self, titles:List, index):
37
+ model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
38
  filters = [
39
  MetadataFilter(
40
  key="title",
 
47
  filters = MetadataFilters(filters=filters, condition="or")
48
 
49
  # Create the QueryEngineTool with the index and filters
50
+ kwargs = {"similarity_top_k": 10, "filters": filters}
51
 
52
  retriever = index.as_retriever(**kwargs)
53
 
54
  # citation_engine = CitationQueryEngine(retriever=retriever)
55
 
56
+ # return CitationQueryEngine.from_args(index, retriever=retriever)
57
+ return MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal)
58
+
59
 
60
  def get_chat_engine(self, session_id, index, titles=None, type_bot="general"):
61
  # Create the QueryEngineTool based on the type
62
  if type_bot == "general":
63
  # query_engine = index.as_query_engine(similarity_top_k=3)
64
+ # citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
65
+ model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
66
+ retriever = index.as_retriever(similarity_top_k=10)
67
+ citation_engine = MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal)
68
+ # description = "A book containing information about medicine"
69
  else:
70
  citation_engine = self.get_citation_engine(titles, index)
71
+ # description = "A book containing information about medicine"
72
 
73
+ # metadata = ToolMetadata(name="bot-belajar", description=description)
 
74
 
75
+ # vector_query_engine = QueryEngineTool(
76
+ # query_engine=citation_engine, metadata=metadata
77
+ # )
78
+
79
+ vector_tool = QueryEngineTool.from_defaults(
80
+ query_engine=citation_engine,
81
+ name="vector_tool",
82
+ description=(
83
+ "Useful for retrieving specific context from the data from a book containing information about medicine"
84
+ ),
85
  )
86
+
87
 
88
  # Initialize the OpenAI agent with the tools
89
 
90
+ # if type_bot == "general":
91
+ # system_prompt = SYSTEM_BOT_GENERAL_TEMPLATE
92
+ # else:
93
+ # additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
94
+ # system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
95
+ # chat_engine = OpenAIAgent.from_tools(
96
+ # tools=[vector_query_engine],
97
+ # llm=self.llm,
98
+ # memory=self.chat_store.initialize_memory_bot(session_id),
99
+ # system_prompt=system_prompt,
100
+ # )
101
+
102
  if type_bot == "general":
103
+ system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE
104
  else:
105
  additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
106
+ system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE.format(additional_information=additional_information)
107
+
108
  chat_engine = OpenAIAgent.from_tools(
109
+ tools=[vector_tool],
110
  llm=self.llm,
111
  memory=self.chat_store.initialize_memory_bot(session_id),
112
  system_prompt=system_prompt,
core/chat/messaging.py DELETED
@@ -1,63 +0,0 @@
1
- # Experimental
2
-
3
- from typing import Dict, Any, Optional, List
4
- import asyncio
5
- import logging
6
- from uuid import uuid4
7
- from anyio import ClosedResourceError
8
- from anyio.streams.memory import MemoryObjectSendStream
9
-
10
- from llama_index.core.callbacks.base import BaseCallbackHandler, CallbackManager
11
- from llama_index.core.callbacks import CBEventType, EventPayload
12
- from llama_index.core.query_engine.sub_question_query_engine import (
13
- SubQuestionAnswerPair,
14
- )
15
- from llama_index.core.chat_engine.types import StreamingAgentChatResponse
16
- from pydantic import BaseModel
17
-
18
- from core.chat import schema
19
-
20
- from db.db import MessageSubProcessSourceEnum
21
- from core.chat.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap
22
- from core.chat.engine import Engine
23
- from script.vector_db import IndexManager
24
- from service.dto import UserPromptRequest
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
- class StreamedMessage(BaseModel):
29
- content: str
30
-
31
- async def handle_chat_message(
32
- user_message: str,
33
- send_chan: MemoryObjectSendStream,
34
- ) -> None:
35
- async with send_chan:
36
- engine = Engine()
37
-
38
- index_manager = IndexManager()
39
- index = index_manager.load_existing_indexes()
40
-
41
- # Retrieve the chat engine with the loaded index
42
- chat_engine = await engine.get_chat_engine(index)
43
-
44
- logger.debug("Engine received")
45
- streaming_chat_response: StreamingAgentChatResponse = (
46
- await chat_engine.astream_chat(user_message)
47
- )
48
- response_str = ""
49
- async for text in streaming_chat_response.async_response_gen():
50
- response_str += text
51
- if send_chan._closed:
52
- logger.debug(
53
- "Received streamed token after send channel closed. Ignoring."
54
- )
55
- return
56
- await send_chan.send(StreamedMessage(content=response_str))
57
-
58
- if response_str.strip() == "":
59
- await send_chan.send(
60
- StreamedMessage(
61
- content="Sorry, I either wasn't able to understand your question or I don't have an answer for it."
62
- )
63
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/chat/schema.py DELETED
@@ -1,162 +0,0 @@
1
- # Experimental
2
-
3
- from pydantic import BaseModel, Field, field_validator
4
- from typing import List, Optional, Dict, Union, Any
5
- from enum import Enum
6
- from uuid import UUID
7
- from datetime import datetime
8
- from llama_index.core.schema import BaseNode, NodeWithScore
9
- from llama_index.core.callbacks.schema import EventPayload
10
- from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
11
- from db.db import (
12
- MessageRoleEnum,
13
- MessageStatusEnum,
14
- MessageSubProcessSourceEnum,
15
- MessageSubProcessStatusEnum,
16
- )
17
-
18
- DB_DOC_ID_KEY = "db_document_id"
19
-
20
- class Base(BaseModel):
21
- id: Optional[UUID] = Field(None, description="Unique identifier")
22
- created_at: Optional[datetime] = Field(None, description="Creation datetime")
23
- updated_at: Optional[datetime] = Field(None, description="Update datetime")
24
-
25
- class Config:
26
- orm_mode = True
27
-
28
- class BaseMetadataObject(BaseModel):
29
- class Config:
30
- orm_mode = True
31
-
32
- class Citation(BaseMetadataObject):
33
- document_id: UUID
34
- text: str
35
- page_number: int
36
- score: Optional[float]
37
-
38
- @field_validator("document_id")
39
- def validate_document_id(cls, value):
40
- if value:
41
- return str(value)
42
- return value
43
-
44
- @classmethod
45
- def from_node(cls, node_w_score: NodeWithScore) -> "Citation":
46
- node: BaseNode = node_w_score.node
47
- page_number = int(node.source_node.metadata["page_label"])
48
- document_id = node.source_node.metadata[""]
49
- return cls(
50
- document_id=document_id,
51
- text=node.get_content(),
52
- page_number=page_number,
53
- score=node_w_score.score,
54
- )
55
-
56
-
57
- class QuestionAnswerPair(BaseMetadataObject):
58
- """
59
- A question-answer pair that is used to store the sub-questions and answers
60
- """
61
-
62
- question: str
63
- answer: Optional[str]
64
- citations: Optional[List[Citation]] = None
65
-
66
- @classmethod
67
- def from_sub_question_answer_pair(
68
- cls, sub_question_answer_pair: SubQuestionAnswerPair
69
- ):
70
- if sub_question_answer_pair.sources is None:
71
- citations = None
72
- else:
73
- citations = [
74
- Citation.from_node(node_w_score)
75
- for node_w_score in sub_question_answer_pair.sources
76
- if node_w_score.node.source_node is not None
77
- and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata
78
- ]
79
- citations = citations or None
80
- return cls(
81
- question=sub_question_answer_pair.sub_q.sub_question,
82
- answer=sub_question_answer_pair.answer,
83
- citations=citations,
84
- )
85
-
86
-
87
- # later will be Union[QuestionAnswerPair, more to add later... ]
88
- class SubProcessMetadataKeysEnum(str, Enum):
89
- SUB_QUESTION = EventPayload.SUB_QUESTION.value
90
-
91
-
92
- # keeping the typing pretty loose here, in case there are changes to the metadata data formats.
93
- SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any]
94
-
95
-
96
- class MessageSubProcess(Base):
97
- message_id: UUID
98
- source: MessageSubProcessSourceEnum
99
- status: MessageSubProcessStatusEnum
100
- metadata_map: Optional[SubProcessMetadataMap]
101
-
102
-
103
- class Message(Base):
104
- conversation_id: UUID
105
- content: str
106
- role: MessageRoleEnum
107
- status: MessageStatusEnum
108
- sub_processes: List[MessageSubProcess]
109
-
110
-
111
- class UserMessageCreate(BaseModel):
112
- content: str
113
-
114
- class DocumentMetadataKeysEnum(str, Enum):
115
- """
116
- Enum for the keys of the metadata map for a document
117
- """
118
-
119
- SEC_DOCUMENT = "sec_document"
120
-
121
-
122
- class SecDocumentTypeEnum(str, Enum):
123
- """
124
- Enum for the type of sec document
125
- """
126
-
127
- TEN_K = "10-K"
128
- TEN_Q = "10-Q"
129
-
130
-
131
- class SecDocumentMetadata(BaseModel):
132
- """
133
- Metadata for a document that is a sec document
134
- """
135
-
136
- company_name: str
137
- company_ticker: str
138
- doc_type: SecDocumentTypeEnum
139
- year: int
140
- quarter: Optional[int]
141
- accession_number: Optional[str]
142
- cik: Optional[str]
143
- period_of_report_date: Optional[datetime]
144
- filed_as_of_date: Optional[datetime]
145
- date_as_of_change: Optional[datetime]
146
-
147
-
148
- DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any]
149
-
150
-
151
- class Document(Base):
152
- url: str
153
- metadata_map: Optional[DocumentMetadataMap] = None
154
-
155
-
156
- class Conversation(Base):
157
- messages: List[Message]
158
- documents: List[Document]
159
-
160
-
161
- class ConversationCreate(BaseModel):
162
- document_ids: List[UUID]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/module_creator/__init__.py DELETED
File without changes
core/multimodal.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.query_engine import CustomQueryEngine
2
+ from llama_index.core.retrievers import BaseRetriever
3
+ from llama_index.multi_modal_llms.openai import OpenAIMultiModal
4
+ from llama_index.core.schema import ImageNode, NodeWithScore, MetadataMode
5
+ from llama_index.core.prompts import PromptTemplate
6
+ from llama_index.core.base.response.schema import Response
7
+ from typing import Optional
8
+ from core.prompt import MULTOMODAL_QUERY_TEMPLATE
9
+
10
+
11
+ gpt_4o = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
12
+
13
+
14
+ QA_PROMPT = PromptTemplate(MULTOMODAL_QUERY_TEMPLATE)
15
+
16
+
17
+ class MultimodalQueryEngine(CustomQueryEngine):
18
+ """Custom multimodal Query Engine.
19
+
20
+ Takes in a retriever to retrieve a set of document nodes.
21
+ Also takes in a prompt template and multimodal model.
22
+
23
+ """
24
+
25
+ qa_prompt: PromptTemplate
26
+ retriever: BaseRetriever
27
+ multi_modal_llm: OpenAIMultiModal
28
+
29
+ def __init__(self, qa_prompt: Optional[PromptTemplate] = None, **kwargs) -> None:
30
+ """Initialize."""
31
+ super().__init__(qa_prompt=qa_prompt or QA_PROMPT, **kwargs)
32
+
33
+ def custom_query(self, query_str: str):
34
+ # retrieve text nodes
35
+ nodes = self.retriever.retrieve(query_str)
36
+ # create ImageNode items from text nodes
37
+
38
+ image_nodes = [
39
+ NodeWithScore(node=ImageNode(image_url=link))
40
+ for n in nodes
41
+ if "image_link" in n.metadata
42
+ and n.metadata["image_link"] not in ["", []]
43
+ for link in (n.metadata["image_link"] if isinstance(n.metadata["image_link"], list) else [n.metadata["image_link"]])
44
+ if link not in ["", []]
45
+ ]
46
+
47
+ print("image_nodes: {}".format(image_nodes))
48
+
49
+ # create context string from text nodes, dump into the prompt
50
+ context_str = "\n\n".join(
51
+ [r.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]
52
+ )
53
+ fmt_prompt = self.qa_prompt.format(context_str=context_str, query_str=query_str)
54
+
55
+ # synthesize an answer from formatted text and images
56
+ llm_response = self.multi_modal_llm.complete(
57
+ prompt=fmt_prompt,
58
+ image_documents=[image_node.node for image_node in image_nodes],
59
+ )
60
+ return Response(
61
+ response=str(llm_response),
62
+ source_nodes=nodes,
63
+ metadata={"text_nodes": nodes, "image_nodes": image_nodes},
64
+ )
core/parser.py CHANGED
@@ -64,7 +64,6 @@ def update_response(text):
64
 
65
  return text
66
 
67
-
68
  def renumber_sources(source_list):
69
  new_sources = []
70
  for i, source in enumerate(source_list):
@@ -74,7 +73,6 @@ def renumber_sources(source_list):
74
  new_sources.append(f"source {i+1}: {content}")
75
  return new_sources
76
 
77
-
78
  def sort_and_renumber_sources(source_list):
79
  """
80
  This function takes a list of sources, sorts them based on the source number,
@@ -98,6 +96,7 @@ def sort_and_renumber_sources(source_list):
98
 
99
  return sorted_sources
100
 
 
101
  def seperate_to_list(text):
102
  # Step 1: Split the text by line breaks (\n)
103
  lines = text.split("\n")
@@ -123,7 +122,7 @@ def join_list(items):
123
  return f"{items[0]} and {items[1]}"
124
  else:
125
  return ", ".join(items[:-1]) + " and " + items[-1]
126
-
127
  def redesign_structure_message(message, metadata):
128
  """
129
  This function replaces occurrences of '[n]' in the message
@@ -143,4 +142,31 @@ def redesign_structure_message(message, metadata):
143
  # Use regex to find all citations in the format '[n]'
144
  redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
145
 
146
- return redesigned_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  return text
66
 
 
67
  def renumber_sources(source_list):
68
  new_sources = []
69
  for i, source in enumerate(source_list):
 
73
  new_sources.append(f"source {i+1}: {content}")
74
  return new_sources
75
 
 
76
  def sort_and_renumber_sources(source_list):
77
  """
78
  This function takes a list of sources, sorts them based on the source number,
 
96
 
97
  return sorted_sources
98
 
99
+
100
  def seperate_to_list(text):
101
  # Step 1: Split the text by line breaks (\n)
102
  lines = text.split("\n")
 
122
  return f"{items[0]} and {items[1]}"
123
  else:
124
  return ", ".join(items[:-1]) + " and " + items[-1]
125
+
126
  def redesign_structure_message(message, metadata):
127
  """
128
  This function replaces occurrences of '[n]' in the message
 
142
  # Use regex to find all citations in the format '[n]'
143
  redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
144
 
145
+ return redesigned_message
146
+
147
+ def extract_sorted_page_numbers(content):
148
+ # Regular expression pattern to match page references like [p-166], [p-163], etc.
149
+ page_pattern = r'\[p-(\d+)\]'
150
+
151
+ # Find all matches (page numbers) in the content
152
+ page_numbers = re.findall(page_pattern, content)
153
+
154
+ # Convert the found page numbers into integers, remove duplicates, and sort them
155
+ return sorted(set(map(int, page_numbers))) # Use set to remove duplicates and sorted to sort them
156
+
157
+ # Method to filter and create a new list with the relevant page numbers [163, 165, 166]
158
+ def filter_metadata_by_pages(metadata, pages):
159
+ if pages and metadata:
160
+ combined_metadata = [{
161
+ "page_number": pages,
162
+ "title": metadata[0]["title"], # All entries share the same title
163
+ "author": metadata[0]["author"], # All entries share the same author
164
+ "category": metadata[0]["category"], # All entries share the same category
165
+ "year": metadata[0]["year"], # All entries share the same year
166
+ "publisher": metadata[0]["publisher"], # All entries share the same publisher
167
+ "reference": metadata[0]["reference"] # All entries share the same reference
168
+ }]
169
+
170
+ return combined_metadata
171
+ else:
172
+ return []
core/prompt.py CHANGED
@@ -1,5 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  SYSTEM_BOT_TEMPLATE = """
2
- Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu hanya memberikan informasi dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
3
 
4
  **Instruksi**:
5
 
@@ -14,6 +47,121 @@ Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis
14
  5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
15
  """
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  ADDITIONAL_INFORMATIONS = """
18
  Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
19
  """
@@ -49,6 +197,7 @@ Kamu juga harus memperhatikan instruksi :
49
  - "Dapatkan buku ini sekarang dan tingkatkan pemahaman Anda tentang kesehatan 😊"
50
  """
51
 
 
52
  SYSTEM_TOPIC_TEMPLATE = """
53
  You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
54
  """
@@ -147,7 +296,6 @@ Your task is to extract and organize metadata for the {class_name}. Follow the i
147
  - **How:** Structure the entries clearly and precisely as attributes of the class.
148
  - **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
149
  """
150
-
151
  SUMMARIZER_SYSTEM_TEMPLATE = """
152
 
153
  """
 
1
+ PARSER_INSTRUCTION = """
2
+ You are a highly proficient language model designed to convert pages from PDF, PPT and other files into structured markdown text. Your goal is to accurately transcribe text and identify and describe images, particularly graphs and other graphical elements.
3
+
4
+ You have been tasked with creating a markdown copy of each page from the provided PDF or PPT image. You should write the number of the figure, and keep it in your markdown text. Each image description must include a full description of the content, a summary of the graphical object.
5
+
6
+ Maintain the sequence of all the elements.
7
+
8
+ For the following element, follow the requirement of extraction:
9
+ for Text:
10
+ - Extract all readable text from the page.
11
+ - Exclude any diagonal text, headers, and footers.
12
+
13
+ for Text which includes hyperlink:
14
+ -Extract hyperlink and present it with the text
15
+
16
+ for Image Identification and Description:
17
+ - Identify all images, graphs, and other graphical elements on the page.
18
+ - For each image or graph, note the figure number and include it in the description as "Figure X" where X is the figure number.
19
+ - If the image has graph , extract the graph as image . DO NOT convert it into a table or extract the wording inside the graph.
20
+ - If image contains wording that is hard to extract , flag it with <unidentifiable section> instead of parsing.
21
+ - If the image has a subtitle or caption, include it in the description.
22
+ - If the image has a organisation chart , convert it into a hierachical understandable format.
23
+ - If the image contain process flow , capture it as a whole image instead of separate into blocks of images.
24
+
25
+ for Table:
26
+ - Try to retain the columns and structure of the table and extract it into markdown format.
27
+
28
+ # OUTPUT INSTRUCTIONS
29
+
30
+ - Exclude any diagonal text, headers, and footers from the output.
31
+ - For each image and graph, provide a detailed description,caption if there's any and summary. Clearly denote the figure number for each image in the format "Figure X" if it is noticed in the context.
32
+ """
33
+
34
  SYSTEM_BOT_TEMPLATE = """
35
+ Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu hanya memberikan informasi dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. Jika bertanya tentang rangkuman, cukup rangkum apa yang kamu tahu pada konteks yang kamu miliki. Jika bertanya tentang daftar isi, pastikan kamu melihatnya dari teks pendahuluan atau content tablenya. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
36
 
37
  **Instruksi**:
38
 
 
47
  5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
48
  """
49
 
50
+ SYSTEM_BOT_IMAGE_TEMPLATE = """
51
+ Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu menggunakan tool, jangan mencoba coba untuk menerka jawaban dari pengetahuanmu kemudian pastikan kamu hanya memberikan informasi dari tool dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi secara singkat apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli. Selalu cantumkan citation halamannya dari konteks yang kamu ambil berdasarkan format yang ada. Yaitu menggunakan [p-no.halaman] di akhir kutipan yang kamu ambil.
52
+
53
+ **Instruksi**:
54
+
55
+ 1. **Jawaban Berdasarkan Tools**: Jika pengguna bertanya tentang topik kedokteran, gunakanlah tools yang tersedia untuk memberikan jawaban. Pastikan jawabanmu relevan dan sesuai dengan informasi dari tools tersebut. Jelaskan informasi dengan jelas dan lengkap. Jika ada tabel, boleh anda tampilkan tabel nya untuk menyampaikan data data yang jelas berdasarkan konteks buku.
56
+
57
+ 2. **Referensi dan Kutipan**:
58
+ - Jika di konteks terdapat gambar maka anda perlu menampilkan gambar dengan cantumkan link gambarnya yang ditampilkan di markdown**, serta tampilkan di halaman berapa gambar tersebut diambil contoh :
59
+ * Based on the images: <br>
60
+ ![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/test/img_p9_1.png) --> kamu tetap harus menampilkan tampilan gambar yang di markdown sehingga menjadi :
61
+
62
+ Pada gambar :\n
63
+ ![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/test/img_p9_1.png)
64
+ \nsumber : [p-10]
65
+
66
+ Jika memang tidak ada gambar pada konteks yang dibutuhkan oleh pertanyaan, anda cukup tambahkan bahwa anda tidak memiliki gambar pada konteks pertanyaan. Pastikan gambarnya sesuai dengan apa yang ditanyakan, tidak membuat gambar yang tidak sesuai dengan konteks.
67
+
68
+ - Jangan menghapus sumber kutipan baik berupa citasi atau halaman (page number) dari teks yang diberikan. Contohnya, jika teksnya adalah "Ilmu kedokteran sangat dibutuhkan [p-2]", pastikan untuk menyertakan kutipan sumbernya yaitu [p-2] dalam jawabanmu. Contoh lain: :
69
+
70
+ * Water is wet when the sky is red [p-11]. --> kamu harus tetap cantumkan [p-11] pada jawaban yang kamu generate
71
+
72
+ * source :
73
+ page_number : 12
74
+ The sky is red in the evening and blue in the morning. --> Kamu harus mengubahnya menjadi sesuai dengan page numbernya atau citationnya sehingga menjadi :
75
+ The sky is red in the evening and blue in the morning [p-12].
76
+
77
+ 3. **Ketika Tidak Tahu Jawaban**: Jika pertanyaan pengguna tidak dapat dijawab dengan menggunakan tools ini, sampaikan dengan sopan bahwa kamu tidak memiliki jawaban untuk pertanyaan tersebut. Arahkan pengguna untuk mencari informasi lebih lanjut atau bertanya pada ahli di bidang kedokteran.
78
+
79
+ 4. **Gaya Jawaban**: Berikan jawaban dengan gaya yang ramah dan profesional. Sampaikan informasi secara naratif agar lebih mudah dipahami. Boleh menggunakan point point dan uraiannya agar bisa menjelaskan informasi yang kompleks sehingga mudah dipahami. Gunakan kata 'dok' atau 'dokter' untuk merujuk pada dokter, dan hindari kesan monoton dengan menambahkan emotikon jika sesuai seperti 😁, 😊, 🙌, 😉, 😀, 🤔, 😇.
80
+
81
+ 5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
82
+ """
83
+
84
+ MULTOMODAL_QUERY_TEMPLATE = """\
85
+ Below is parsed text from books, available in two formats: 'markdown' (which organizes relevant diagrams as tables) and 'raw text' (preserving the rough spatial layout of the original text). Additionally, image references from the book are provided.
86
+
87
+ ### Instructions:
88
+ 1. **Use image information as the primary source**: Reference the **image URL** to explain your answer, if possible.
89
+ 2. **Only use parsed text** (markdown or raw) **if the image does not provide a clear answer**.
90
+ 3. **Always cite the page number** for any information referenced. Please give the page number after the text that you cited, the format is : [p-no.page]
91
+ 4. **Provide the image inline in the answer** by linking directly to the AWS S3 image URL provided for easy viewing.
92
+ 5. Ensure that the AWS link represented by [title] matches the book's title and that the link or URL is provided in the context. Never include a link or URL that is not present in the context. If no link is available, simply state: "Apologies, the image or content you are referring to is not available in this context.
93
+
94
+ ### Example:
95
+ **Sources Provided:**
96
+
97
+ **Source 1:**
98
+ - Page number: 10
99
+ - Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/[title]/img_p9_1.png`
100
+ - contoh judul : blue sky, so that the link should be :
101
+ - Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p9_1.png`
102
+ - Text: "The sky is red in the evening and blue in the morning. [p-10]"
103
+
104
+ **Source 2:**
105
+ - Page number: 11
106
+ - Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/[title]/img_p10_1.png`
107
+ - contoh judul : blue sky, so that the link should be :
108
+ - Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p10_1.png`
109
+ - Text: "Water is wet when the sky is red. [p-11]"
110
+
111
+ **Query:** When is water wet?
112
+
113
+ **Answer:**
114
+ Based on the images:\n
115
+ ![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p9_1.png)\n
116
+ [p-10]
117
+ and \n
118
+ ![figure-11](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p10_1.png)\n,
119
+ [p=11]\n
120
+ water is wet when the sky is red in the evening [p-10, p-11].
121
+
122
+ **Sources Provided:**
123
+ Source 1:
124
+
125
+ Page number: 15
126
+ Image URL: https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p14_1.png
127
+ Text: "Plants grow best in blue light but struggle in red light."
128
+ Source 2:
129
+
130
+ Page number: 16
131
+ Image URL: https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p15_1.png
132
+ Text: "Optimal light conditions for plant growth are illustrated in Figure 16."
133
+
134
+ Query:
135
+ What color of light is best for plant growth?
136
+
137
+ Answer:
138
+ When we look in the image :\n
139
+ ![figure-15](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p14_1.png)
140
+ \n[p-15]
141
+
142
+
143
+ Plants grow best under blue light, as shown in the color-coded illustration in the image [p-15].
144
+
145
+ And the optimal light condition will be shown it the figure : \n
146
+
147
+ ![figure-16](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p15_1.png)
148
+ \n[p-16]
149
+ ---
150
+
151
+ **Now, please answer the following query based on the sources provided:**
152
+
153
+ ---
154
+
155
+ **Sources:**
156
+ {context_str}
157
+
158
+ **Query:**
159
+ {query_str}
160
+
161
+ **Answer:**
162
+
163
+ """
164
+
165
  ADDITIONAL_INFORMATIONS = """
166
  Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
167
  """
 
197
  - "Dapatkan buku ini sekarang dan tingkatkan pemahaman Anda tentang kesehatan 😊"
198
  """
199
 
200
+
201
  SYSTEM_TOPIC_TEMPLATE = """
202
  You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
203
  """
 
296
  - **How:** Structure the entries clearly and precisely as attributes of the class.
297
  - **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
298
  """
 
299
  SUMMARIZER_SYSTEM_TEMPLATE = """
300
 
301
  """
db/database.py CHANGED
@@ -13,9 +13,6 @@ load_dotenv()
13
 
14
  SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
15
 
16
- # Get the base64 encoded certificate from the environment variable
17
- ca_cert_base64 = os.getenv("CA_CERT_BASE64")
18
-
19
  # Retrieve the Base64-encoded CA certificate from the environment variable
20
  ca_cert_base64 = os.getenv("CA_CERT_BASE64")
21
 
 
13
 
14
  SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
15
 
 
 
 
16
  # Retrieve the Base64-encoded CA certificate from the environment variable
17
  ca_cert_base64 = os.getenv("CA_CERT_BASE64")
18
 
db/db.py DELETED
@@ -1,124 +0,0 @@
1
- # Experimental
2
-
3
- from sqlalchemy import Column, String, Enum, ForeignKey, DateTime
4
- from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB
5
- from sqlalchemy.orm import relationship
6
- from sqlalchemy.sql import func
7
- from enum import Enum
8
- from sqlalchemy.ext.declarative import as_declarative, declared_attr
9
- from llama_index.core.callbacks.schema import CBEventType
10
-
11
-
12
- # Model
13
- @as_declarative()
14
- class Base:
15
- id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4())
16
- created_at = Column(DateTime, server_default=func.now(), nullable=False)
17
- updated_at = Column(
18
- DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
19
- )
20
-
21
- __name__: str
22
-
23
- # Generate __tablename__ automatically
24
- @declared_attr
25
- def __tablename__(cls) -> str:
26
- return cls.__name__.lower()
27
-
28
- # DB
29
- class MessageRoleEnum(str, Enum):
30
- user = "user"
31
- assistant = "assistant"
32
-
33
-
34
- class MessageStatusEnum(str, Enum):
35
- PENDING = "PENDING"
36
- SUCCESS = "SUCCESS"
37
- ERROR = "ERROR"
38
-
39
-
40
- class MessageSubProcessStatusEnum(str, Enum):
41
- PENDING = "PENDING"
42
- FINISHED = "FINISHED"
43
-
44
-
45
- # python doesn't allow enums to be extended, so we have to do this
46
- additional_message_subprocess_fields = {
47
- "CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine",
48
- "SUB_QUESTIONS": "sub_questions",
49
- }
50
- MessageSubProcessSourceEnum = Enum(
51
- "MessageSubProcessSourceEnum",
52
- [(event_type.name, event_type.value) for event_type in CBEventType]
53
- + list(additional_message_subprocess_fields.items()),
54
- )
55
-
56
-
57
- def to_pg_enum(enum_class) -> ENUM:
58
- return ENUM(enum_class, name=enum_class.__name__)
59
-
60
-
61
- class Document(Base):
62
- """
63
- A document along with its metadata
64
- """
65
-
66
- # URL to the actual document (e.g. a PDF)
67
- url = Column(String, nullable=False, unique=True)
68
- metadata_map = Column(JSONB, nullable=True)
69
- conversations = relationship("ConversationDocument", back_populates="document")
70
-
71
-
72
- class Conversation(Base):
73
- """
74
- A conversation with messages and linked documents
75
- """
76
-
77
- messages = relationship("Message", back_populates="conversation")
78
- conversation_documents = relationship(
79
- "ConversationDocument", back_populates="conversation"
80
- )
81
-
82
-
83
- class ConversationDocument(Base):
84
- """
85
- A many-to-many relationship between a conversation and a document
86
- """
87
-
88
- conversation_id = Column(
89
- UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
90
- )
91
- document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True)
92
- conversation = relationship("Conversation", back_populates="conversation_documents")
93
- document = relationship("Document", back_populates="conversations")
94
-
95
-
96
- class Message(Base):
97
- """
98
- A message in a conversation
99
- """
100
-
101
- conversation_id = Column(
102
- UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
103
- )
104
- content = Column(String)
105
- role = Column(to_pg_enum(MessageRoleEnum))
106
- status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING)
107
- conversation = relationship("Conversation", back_populates="messages")
108
- sub_processes = relationship("MessageSubProcess", back_populates="message")
109
-
110
-
111
- class MessageSubProcess(Base):
112
- """
113
- A record of a sub-process that occurred as part of the generation of a message from an AI assistant
114
- """
115
-
116
- message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True)
117
- source = Column(to_pg_enum(MessageSubProcessSourceEnum))
118
- message = relationship("Message", back_populates="sub_processes")
119
- status = Column(
120
- to_pg_enum(MessageSubProcessStatusEnum),
121
- default=MessageSubProcessStatusEnum.FINISHED,
122
- nullable=False,
123
- )
124
- metadata_map = Column(JSONB, nullable=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db/delete_data.py DELETED
@@ -1,22 +0,0 @@
1
- import logging
2
- from db.repository import Repository, get_db_conn
3
-
4
- # Setup logging (configure as needed)
5
- logging.basicConfig(level=logging.INFO)
6
-
7
-
8
- class DeleteDatabase(Repository):
9
- async def delete_record(self, params):
10
- if "id" not in params:
11
- raise ValueError("The 'id' parameter is required.")
12
- query = """
13
- DELETE FROM metadata
14
- WHERE id = :id
15
- """
16
-
17
- try:
18
- await self._exec(query, params)
19
- logging.info(f"Record with id {params['id']} deleted successfully.")
20
- except Exception as e:
21
- logging.error(f"Error deleting record with id {params['id']}: {e}")
22
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db/fetching.py CHANGED
@@ -13,7 +13,7 @@ class DataFetching:
13
  user_meta_entries = user_meta_query.get_user_meta_entries(self.db)
14
 
15
  if not user_meta_entries or user_meta_entries==[]:
16
- return {"info": "No book collection found"}
17
 
18
  # Extract relevant data from the user_meta_entries
19
 
@@ -31,8 +31,6 @@ class DataFetching:
31
  for user_meta, metadata, category in user_meta_entries # Unpack the tuple
32
  ]
33
 
34
- print("Hasil akhir ", results)
35
-
36
  # Extract relevant data from the user_meta_entries
37
  return results
38
 
@@ -42,7 +40,6 @@ class DataFetching:
42
 
43
  return [
44
  MetadataResponse(
45
- status="success",
46
  id = id,
47
  title=title,
48
  author=author,
 
13
  user_meta_entries = user_meta_query.get_user_meta_entries(self.db)
14
 
15
  if not user_meta_entries or user_meta_entries==[]:
16
+ return []
17
 
18
  # Extract relevant data from the user_meta_entries
19
 
 
31
  for user_meta, metadata, category in user_meta_entries # Unpack the tuple
32
  ]
33
 
 
 
34
  # Extract relevant data from the user_meta_entries
35
  return results
36
 
 
40
 
41
  return [
42
  MetadataResponse(
 
43
  id = id,
44
  title=title,
45
  author=author,
db/get_data.py DELETED
@@ -1,69 +0,0 @@
1
- import logging
2
- from db.repository import Repository, get_db_conn
3
- from fastapi.responses import JSONResponse
4
-
5
- # Setup logging (configure as needed)
6
- logging.basicConfig(level=logging.INFO)
7
-
8
-
9
- class GetDatabase(Repository):
10
- async def execute_query(self, query, params=None, fetch_one=False):
11
- """
12
-
13
- Helper function to execute SQL queries and handle exceptions.
14
- """
15
- try:
16
- print(fetch_one)
17
- if fetch_one:
18
-
19
- results = await self._fetch_one(query, params)
20
- print(results)
21
- else:
22
- results = await self.get_by_query(query, params)
23
- print("result execute query : ", results)
24
- return results if results else None
25
- except Exception as e:
26
- logging.error(f"An error occurred while executing query: {e}")
27
- return JSONResponse(status_code=500, content=f"An error occurred while executing query: {e}")
28
-
29
- async def get_data(self, title):
30
- """
31
- Fetch the first result matching the given title from the metadata table.
32
- """
33
- query = """
34
- SELECT * FROM metadata
35
- WHERE title = %s
36
- limit 5;
37
- """
38
-
39
- try:
40
- results = await self.execute_query(query, (title,), fetch_one=True)
41
- return results
42
- except Exception as e:
43
- logging.error(f"An error occurred while get data: {e}")
44
- return JSONResponse(status_code=500, content=f"An error occurred while get data: {e}")
45
-
46
- async def get_all_data(self):
47
- """
48
- Fetch all data from the metadata table.
49
- """
50
- query = """
51
- SELECT * FROM metadata
52
- """
53
- results = await self.execute_query(query)
54
- print("result", results)
55
- return results
56
-
57
- async def get_data_by_id(self, id):
58
- query = f"""
59
- SELECT * FROM metadata WHERE id = :id
60
- """
61
-
62
- param = {"id" : id}
63
- try:
64
- results = await self.execute_query(query, param)
65
- print('Query successful, results: %s', results)
66
- return results[0] if results else None
67
- except Exception as e:
68
- print('Error fetching data by ID %s: %s', id, e)
69
- return JSONResponse(status_code=500, content=f"An error while fething data: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db/query/base_query.py CHANGED
@@ -53,9 +53,8 @@ class BaseQuery:
53
  """Delete an entry by ID with optional filter conditions."""
54
  # Build the query to select the entry
55
  query = select(model)
56
- if id :
57
  query = query.where(model.id == id)
58
-
59
  if filter_conditions:
60
  query = query.where(*filter_conditions)
61
 
@@ -65,9 +64,7 @@ class BaseQuery:
65
  return entry
66
 
67
  # Build the delete query
68
- delete_query = delete(model)
69
- if id :
70
- delete_query = delete_query.where(model.id == id)
71
  if filter_conditions:
72
  delete_query = delete_query.where(*filter_conditions)
73
 
 
53
  """Delete an entry by ID with optional filter conditions."""
54
  # Build the query to select the entry
55
  query = select(model)
56
+ if id:
57
  query = query.where(model.id == id)
 
58
  if filter_conditions:
59
  query = query.where(*filter_conditions)
60
 
 
64
  return entry
65
 
66
  # Build the delete query
67
+ delete_query = delete(model).where(model.id == id)
 
 
68
  if filter_conditions:
69
  delete_query = delete_query.where(*filter_conditions)
70
 
db/query/query_book.py CHANGED
@@ -41,7 +41,6 @@ class BookQuery(BaseQuery):
41
  join_conditions=join_conditions,
42
  multiple=True,
43
  )
44
- print("result", result)
45
 
46
  return result
47
 
 
41
  join_conditions=join_conditions,
42
  multiple=True,
43
  )
 
44
 
45
  return result
46
 
db/query/query_user_meta.py CHANGED
@@ -6,7 +6,7 @@ from db.query.base_query import BaseQuery
6
  class UserMetaQuery(BaseQuery):
7
  def __init__(self, user):
8
  super().__init__(user)
9
-
10
  def get_user_meta_entries(self, db):
11
  """Fetch all user meta entries joined with metadata and category."""
12
  join_models = [Metadata, Category]
@@ -14,7 +14,7 @@ class UserMetaQuery(BaseQuery):
14
  User_Meta.metadata_id == Metadata.id,
15
  Metadata.category_id == Category.id,
16
  ]
17
-
18
  filter_conditions = [User_Meta.user_id == self.user_id]
19
 
20
  result = self.get_with_joins(
@@ -23,10 +23,10 @@ class UserMetaQuery(BaseQuery):
23
  join_models=join_models,
24
  join_conditions=join_conditions,
25
  filter_conditions=filter_conditions,
26
- multiple=True,
27
  )
28
  return result
29
-
30
  def insert_user_meta_entries(self, db, metadata_ids):
31
  """Insert new user meta entries if they don't already exist."""
32
  # Fetch existing metadata IDs for the user
@@ -58,14 +58,13 @@ class UserMetaQuery(BaseQuery):
58
  "metadata_ids": new_metadata_ids, # Include only new metadata IDs in the result
59
  }
60
 
 
61
  def update_user_meta_entries(self, db, metadata_ids):
62
  """Update user meta entries: keep, delete, or add new entries based on metadata_ids."""
63
  filter_conditions = [User_Meta.user_id == self.user_id]
64
-
65
  # Fetch existing user meta entries
66
- existing_user_meta = self.get(
67
- db, model=User_Meta, filter_conditions=filter_conditions, multiple=True
68
- )
69
  existing_user_meta = [user_meta[0] for user_meta in existing_user_meta]
70
  existing_meta_ids = [entry.metadata_id for entry in existing_user_meta]
71
 
@@ -80,10 +79,7 @@ class UserMetaQuery(BaseQuery):
80
 
81
  # Delete entries that are no longer in the updated metadata_ids list
82
  if metadata_to_delete:
83
- db.query(User_Meta).filter(
84
- User_Meta.user_id == self.user_id,
85
- User_Meta.metadata_id.in_(metadata_to_delete),
86
- ).delete(synchronize_session=False)
87
 
88
  # Add new entries for metadata that are not in the existing user meta
89
  for meta_id in metadata_to_add:
@@ -91,7 +87,7 @@ class UserMetaQuery(BaseQuery):
91
  self.add(db, new_entry)
92
 
93
  db.commit()
94
-
95
  return {
96
  "status": "success",
97
  "added_meta": list(metadata_to_add),
@@ -101,15 +97,10 @@ class UserMetaQuery(BaseQuery):
101
 
102
  def delete_user_meta(self, db, metadata_id):
103
  """Delete user meta entries by metadata_id."""
104
- filter_conditions = [
105
- User_Meta.metadata_id == metadata_id,
106
- User_Meta.user_id == self.user_id,
107
- ]
108
  self.delete(db, model=User_Meta, filter_conditions=filter_conditions)
109
- return {
110
- "status": "success",
111
- "message": f"Book user with id {metadata_id} deleted successfully.",
112
- }
113
 
114
  def delete_all_user_meta(self, db):
115
  """Delete all user meta entries for a user."""
 
6
  class UserMetaQuery(BaseQuery):
7
  def __init__(self, user):
8
  super().__init__(user)
9
+
10
  def get_user_meta_entries(self, db):
11
  """Fetch all user meta entries joined with metadata and category."""
12
  join_models = [Metadata, Category]
 
14
  User_Meta.metadata_id == Metadata.id,
15
  Metadata.category_id == Category.id,
16
  ]
17
+
18
  filter_conditions = [User_Meta.user_id == self.user_id]
19
 
20
  result = self.get_with_joins(
 
23
  join_models=join_models,
24
  join_conditions=join_conditions,
25
  filter_conditions=filter_conditions,
26
+ multiple=True
27
  )
28
  return result
29
+
30
  def insert_user_meta_entries(self, db, metadata_ids):
31
  """Insert new user meta entries if they don't already exist."""
32
  # Fetch existing metadata IDs for the user
 
58
  "metadata_ids": new_metadata_ids, # Include only new metadata IDs in the result
59
  }
60
 
61
+
62
  def update_user_meta_entries(self, db, metadata_ids):
63
  """Update user meta entries: keep, delete, or add new entries based on metadata_ids."""
64
  filter_conditions = [User_Meta.user_id == self.user_id]
65
+
66
  # Fetch existing user meta entries
67
+ existing_user_meta = self.get(db, model=User_Meta, filter_conditions=filter_conditions, multiple=True)
 
 
68
  existing_user_meta = [user_meta[0] for user_meta in existing_user_meta]
69
  existing_meta_ids = [entry.metadata_id for entry in existing_user_meta]
70
 
 
79
 
80
  # Delete entries that are no longer in the updated metadata_ids list
81
  if metadata_to_delete:
82
+ db.query(User_Meta).filter(User_Meta.user_id == self.user_id, User_Meta.metadata_id.in_(metadata_to_delete)).delete(synchronize_session=False)
 
 
 
83
 
84
  # Add new entries for metadata that are not in the existing user meta
85
  for meta_id in metadata_to_add:
 
87
  self.add(db, new_entry)
88
 
89
  db.commit()
90
+
91
  return {
92
  "status": "success",
93
  "added_meta": list(metadata_to_add),
 
97
 
98
  def delete_user_meta(self, db, metadata_id):
99
  """Delete user meta entries by metadata_id."""
100
+ filter_conditions = [User_Meta.metadata_id==metadata_id,
101
+ User_Meta.user_id==self.user_id]
 
 
102
  self.delete(db, model=User_Meta, filter_conditions=filter_conditions)
103
+ return {"message": f"Book user with id {metadata_id} deleted successfully."}
 
 
 
104
 
105
  def delete_all_user_meta(self, db):
106
  """Delete all user meta entries for a user."""
db/save_data.py DELETED
@@ -1,39 +0,0 @@
1
- from databases import Database
2
- import logging
3
- from dotenv import load_dotenv
4
- from db.repository import Repository
5
-
6
-
7
- load_dotenv()
8
-
9
-
10
- class InsertDatabase(Repository):
11
-
12
- # Example function to insert data asynchronously
13
- async def insert_data(self, params, category_id):
14
- # SQL insert query with named placeholders
15
- query = """
16
- INSERT INTO metadata (title, category_id, author, year, publisher)
17
- VALUES (:title, :category_id, :author, :year, :publisher)
18
- """
19
-
20
- reference = {
21
- "title": params["title"],
22
- "category_id": category_id, # directly assign category_id
23
- "author": params["author"],
24
- "year": params["year"],
25
- "publisher": params["publisher"]
26
- }
27
-
28
-
29
- print(reference)
30
- try:
31
- # Execute the query with the provided values
32
- await self._exec(query, reference)
33
- logging.info(
34
- f"Data inserted successfully: {reference['title']}, {reference['author']}"
35
- )
36
- except Exception as e:
37
- # Log any errors that occur during the database insert operation
38
- logging.error(f"Failed to insert data: {e}")
39
- raise # Re-raise the exception to allow further handling if needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db/update_data.py DELETED
@@ -1,35 +0,0 @@
1
- import logging
2
- from db.repository import Repository, get_db_conn
3
-
4
- # Setup logging (configure as needed)
5
- logging.basicConfig(level=logging.INFO)
6
-
7
-
8
- class UpdateDatabase(Repository):
9
- async def update_record(self, reference):
10
- print("update record", reference)
11
- if "id" not in reference:
12
- raise ValueError("The 'id' parameter is required.")
13
- query = """
14
- UPDATE metadata
15
- SET title = :title,
16
- category_id = :category_id,
17
- author = :author,
18
- year = :year,
19
- publisher = :publisher
20
- WHERE id = :id
21
- """
22
- print(query)
23
-
24
- print(reference)
25
-
26
- try:
27
- await self._exec(query, reference)
28
- logging.info(
29
- f"Record with id {reference['id']} updated successfully."
30
- )
31
- except Exception as e:
32
- logging.error(
33
- f"Error updating record with id {reference['id']}: {e}"
34
- )
35
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
helper/bot_function.py DELETED
File without changes
helper/db_function.py DELETED
File without changes
requirements.txt CHANGED
@@ -53,12 +53,6 @@ joblib==1.4.2
53
  jose==1.0.0
54
  jsonpatch==1.33
55
  jsonpointer==3.0.0
56
- kubernetes==30.1.0
57
- langchain==0.3.0
58
- langchain-community==0.3.0
59
- langchain-core==0.3.1
60
- langchain-openai==0.2.0
61
- langchain-text-splitters==0.3.0
62
  langchainhub==0.1.21
63
  langfuse==2.48.1
64
  langsmith==0.1.123
 
53
  jose==1.0.0
54
  jsonpatch==1.33
55
  jsonpointer==3.0.0
 
 
 
 
 
 
56
  langchainhub==0.1.21
57
  langfuse==2.48.1
58
  langsmith==0.1.123
research/delete.ipynb CHANGED
@@ -33,7 +33,7 @@
33
  },
34
  {
35
  "cell_type": "code",
36
- "execution_count": 5,
37
  "metadata": {},
38
  "outputs": [
39
  {
@@ -54,17 +54,19 @@
54
  "\n",
55
  "load_dotenv()\n",
56
  "\n",
57
- "api_key = os.getenv(\"PINECONE_API_KEY\")\n",
58
- "\n",
59
  "pc = Pinecone(api_key=api_key)\n",
60
- "index = pc.Index(\"summarizer-semantic-index\")\n",
 
61
  "\n",
62
- "random_vector = [random.uniform(0, 1) for _ in range(1536)]\n",
 
63
  "results = index.query(\n",
64
  " vector=random_vector,\n",
65
  " top_k=10000,\n",
66
  " filter={\n",
67
- " \"title\": {\"$eq\": \"test\"},\n",
68
  " },\n",
69
  ")\n",
70
  "\n",
 
33
  },
34
  {
35
  "cell_type": "code",
36
+ "execution_count": null,
37
  "metadata": {},
38
  "outputs": [
39
  {
 
54
  "\n",
55
  "load_dotenv()\n",
56
  "\n",
57
+ "# api_key = os.getenv(\"PINECONE_API_KEY\")\n",
58
+ "api_key = \"pcsk_aZM8H_P9cK1nfUghBNJfiAhvRM6zgfgiBsHhtJDwydZaXZp47pKSQBFP6J7rmVPwqDYHW\"\n",
59
  "pc = Pinecone(api_key=api_key)\n",
60
+ "# index = pc.Index(\"summarizer-semantic-index\")\n",
61
+ "index = pc.Index(\"multimedika\")\n",
62
  "\n",
63
+ "# random_vector = [random.uniform(0, 1) for _ in range(1536)]\n",
64
+ "random_vector = [random.uniform(0, 1) for _ in range(768)]\n",
65
  "results = index.query(\n",
66
  " vector=random_vector,\n",
67
  " top_k=10000,\n",
68
  " filter={\n",
69
+ " \"url\": {\"$eq\": \"test\"},\n",
70
  " },\n",
71
  ")\n",
72
  "\n",
research/llama_parse.ipynb CHANGED
The diff for this file is too large to render. See raw diff