Spaces:
Sleeping
Sleeping
Commit
·
0767396
1
Parent(s):
d879d77
fix : update code
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +11 -1
- api/events.py +1 -2
- api/function.py +10 -13
- api/router/book.py +23 -23
- api/router/book_collection.py +5 -28
- api/router/bot.py +0 -92
- api/router/bot_general.py +14 -4
- api/router/bot_one.py +3 -2
- api/router/bot_specific.py +2 -1
- api/router/category.py +3 -2
- api/router/testing.py +71 -45
- api/router/topic.py +0 -69
- api/router/user.py +99 -28
- api/util/util.py +0 -0
- app.py +2 -2
- config.py +1 -1
- controller/__init__.py +0 -0
- controller/book_collection_controller.py +0 -0
- controller/book_controller.py +0 -0
- controller/bot_general_controller.py +0 -0
- controller/bot_one_controller.py +0 -0
- controller/bot_specific_controller.py +0 -0
- controller/category_controller.py +0 -0
- controller/user_controller.py +0 -0
- core/book_enabler/__init__.py +0 -0
- core/chat/bot_service.py +25 -3
- core/chat/bot_service_multimodal.py +169 -0
- core/chat/chatstore.py +47 -40
- core/chat/engine.py +45 -14
- core/chat/messaging.py +0 -63
- core/chat/schema.py +0 -162
- core/module_creator/__init__.py +0 -0
- core/multimodal.py +64 -0
- core/parser.py +30 -4
- core/prompt.py +150 -2
- db/database.py +0 -3
- db/db.py +0 -124
- db/delete_data.py +0 -22
- db/fetching.py +1 -4
- db/get_data.py +0 -69
- db/query/base_query.py +2 -5
- db/query/query_book.py +0 -1
- db/query/query_user_meta.py +12 -21
- db/save_data.py +0 -39
- db/update_data.py +0 -35
- helper/bot_function.py +0 -0
- helper/db_function.py +0 -0
- requirements.txt +0 -6
- research/delete.ipynb +8 -6
- research/llama_parse.ipynb +0 -0
.gitignore
CHANGED
@@ -398,4 +398,14 @@ FodyWeavers.xsd
|
|
398 |
*.sln.iml
|
399 |
|
400 |
.env
|
401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
*.sln.iml
|
399 |
|
400 |
.env
|
401 |
+
|
402 |
+
*.pem
|
403 |
+
*.ipynb
|
404 |
+
*.json
|
405 |
+
|
406 |
+
# Ignore directories and specific folders
|
407 |
+
/research/
|
408 |
+
|
409 |
+
# Auto Generated PWA files
|
410 |
+
**/public/sw.js
|
411 |
+
**/public/workbox-*.js
|
api/events.py
CHANGED
@@ -7,8 +7,6 @@ from db.database import engine
|
|
7 |
from db.models import Base
|
8 |
from llama_index.core import set_global_handler
|
9 |
|
10 |
-
|
11 |
-
|
12 |
load_dotenv()
|
13 |
|
14 |
|
@@ -23,6 +21,7 @@ async def startup() -> None:
|
|
23 |
async def shutdown() -> None:
|
24 |
pass
|
25 |
|
|
|
26 |
def register_events(app: FastAPI) -> FastAPI:
|
27 |
app.add_event_handler("startup", startup)
|
28 |
app.add_event_handler("shutdown", shutdown)
|
|
|
7 |
from db.models import Base
|
8 |
from llama_index.core import set_global_handler
|
9 |
|
|
|
|
|
10 |
load_dotenv()
|
11 |
|
12 |
|
|
|
21 |
async def shutdown() -> None:
|
22 |
pass
|
23 |
|
24 |
+
|
25 |
def register_events(app: FastAPI) -> FastAPI:
|
26 |
app.add_event_handler("startup", startup)
|
27 |
app.add_event_handler("shutdown", shutdown)
|
api/function.py
CHANGED
@@ -13,7 +13,7 @@ from script.document_uploader import Uploader
|
|
13 |
from script.vector_db import IndexManager
|
14 |
from service.aws_loader import Loader
|
15 |
from service.dto import BotResponseStreaming
|
16 |
-
|
17 |
|
18 |
load_dotenv()
|
19 |
|
@@ -21,7 +21,7 @@ load_dotenv()
|
|
21 |
logging.basicConfig(level=logging.INFO)
|
22 |
|
23 |
|
24 |
-
async def data_ingestion(reference, file: UploadFile) -> Any:
|
25 |
try:
|
26 |
# Assuming you have a Langfuse callback handler
|
27 |
langfuse_callback_handler = LlamaIndexCallbackHandler()
|
@@ -29,33 +29,31 @@ async def data_ingestion(reference, file: UploadFile) -> Any:
|
|
29 |
user_id="admin_book_uploaded",
|
30 |
)
|
31 |
|
32 |
-
uploader = Uploader(reference, file)
|
33 |
nodes_with_metadata, file_stream = await uploader.process_documents()
|
|
|
|
|
|
|
34 |
|
35 |
# Build indexes using IndexManager
|
36 |
index = IndexManager()
|
37 |
index.build_indexes(nodes_with_metadata)
|
38 |
-
|
39 |
|
40 |
-
#
|
41 |
file_name = f"{reference['title']}"
|
42 |
aws_loader = Loader()
|
43 |
|
44 |
-
# file_obj = file
|
45 |
aws_loader.upload_to_s3(file_stream, file_name)
|
46 |
|
47 |
-
|
48 |
return json.dumps(
|
49 |
{"status": "success", "message": "Vector Index loaded successfully."}
|
50 |
)
|
51 |
|
52 |
except Exception as e:
|
53 |
-
# Log the error
|
54 |
logging.error("An error occurred in data ingestion: %s", e)
|
55 |
-
|
56 |
-
|
57 |
-
content="An internal server error occurred in data ingestion.",
|
58 |
-
)
|
59 |
|
60 |
async def generate_streaming_completion(user_request, session_id):
|
61 |
try:
|
@@ -64,7 +62,6 @@ async def generate_streaming_completion(user_request, session_id):
|
|
64 |
|
65 |
# Load existing indexes
|
66 |
index = index_manager.load_existing_indexes()
|
67 |
-
|
68 |
# Retrieve the chat engine with the loaded index
|
69 |
chat_engine = engine.get_chat_engine(index, session_id)
|
70 |
# Generate completion response
|
|
|
13 |
from script.vector_db import IndexManager
|
14 |
from service.aws_loader import Loader
|
15 |
from service.dto import BotResponseStreaming
|
16 |
+
from utils.error_handlers import handle_exception
|
17 |
|
18 |
load_dotenv()
|
19 |
|
|
|
21 |
logging.basicConfig(level=logging.INFO)
|
22 |
|
23 |
|
24 |
+
async def data_ingestion(reference, file: UploadFile, lang: str = "en") -> Any:
|
25 |
try:
|
26 |
# Assuming you have a Langfuse callback handler
|
27 |
langfuse_callback_handler = LlamaIndexCallbackHandler()
|
|
|
29 |
user_id="admin_book_uploaded",
|
30 |
)
|
31 |
|
32 |
+
uploader = Uploader(reference, file, lang)
|
33 |
nodes_with_metadata, file_stream = await uploader.process_documents()
|
34 |
+
|
35 |
+
if isinstance(nodes_with_metadata, JSONResponse):
|
36 |
+
return nodes_with_metadata # Return the error response directly
|
37 |
|
38 |
# Build indexes using IndexManager
|
39 |
index = IndexManager()
|
40 |
index.build_indexes(nodes_with_metadata)
|
|
|
41 |
|
42 |
+
# Upload AWS
|
43 |
file_name = f"{reference['title']}"
|
44 |
aws_loader = Loader()
|
45 |
|
|
|
46 |
aws_loader.upload_to_s3(file_stream, file_name)
|
47 |
|
|
|
48 |
return json.dumps(
|
49 |
{"status": "success", "message": "Vector Index loaded successfully."}
|
50 |
)
|
51 |
|
52 |
except Exception as e:
|
53 |
+
# Log the error
|
54 |
logging.error("An error occurred in data ingestion: %s", e)
|
55 |
+
# Use handle_exception for structured error handling
|
56 |
+
return handle_exception(e)
|
|
|
|
|
57 |
|
58 |
async def generate_streaming_completion(user_request, session_id):
|
59 |
try:
|
|
|
62 |
|
63 |
# Load existing indexes
|
64 |
index = index_manager.load_existing_indexes()
|
|
|
65 |
# Retrieve the chat engine with the loaded index
|
66 |
chat_engine = engine.get_chat_engine(index, session_id)
|
67 |
# Generate completion response
|
api/router/book.py
CHANGED
@@ -18,6 +18,7 @@ from config import MYSQL_CONFIG
|
|
18 |
from utils.error_handlers import handle_exception
|
19 |
from script.vector_db import IndexManager
|
20 |
from service.dto import MetadataResponse
|
|
|
21 |
from sqlalchemy.orm import Session
|
22 |
from sqlalchemy.future import select
|
23 |
|
@@ -37,9 +38,7 @@ async def get_metadata(user: user_dependency, db: db_dependency):
|
|
37 |
try:
|
38 |
# Join Metadata with Category to get the category name
|
39 |
fetching = DataFetching(user, db)
|
40 |
-
# print(fetching)
|
41 |
metadata_fetching = fetching.metadata_fetching()
|
42 |
-
# print(metadata_fetching)
|
43 |
|
44 |
# Transform results into MetadataResponse model with optional thumbnail handling
|
45 |
return metadata_fetching
|
@@ -58,13 +57,15 @@ async def upload_file(
|
|
58 |
year: int = Form(...),
|
59 |
publisher: str = Form(...),
|
60 |
file: UploadFile = File(...),
|
|
|
61 |
thumbnail: Optional[UploadFile] = File(None),
|
62 |
):
|
63 |
auth_response = check_admin_authentication(user)
|
64 |
if auth_response:
|
65 |
return auth_response
|
66 |
-
|
67 |
|
|
|
|
|
68 |
|
69 |
# Query the category based on category_id
|
70 |
category_query = CategoryQuery(user)
|
@@ -80,28 +81,32 @@ async def upload_file(
|
|
80 |
}
|
81 |
|
82 |
# Process the file and handle data ingestion
|
83 |
-
response = await data_ingestion(reference, file)
|
|
|
|
|
|
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# Create a new Metadata object
|
86 |
book_query = BookQuery(user)
|
87 |
book_query.add_book(db, title, author, category_id, year, publisher)
|
88 |
logging.info("Database Inserted")
|
89 |
|
90 |
return {
|
91 |
-
"status": "success",
|
92 |
"filename": file.filename,
|
93 |
"response": response,
|
94 |
"info": "upload file successfully",
|
95 |
}
|
96 |
|
97 |
except Exception as e:
|
98 |
-
return
|
99 |
-
|
100 |
-
"filename": "",
|
101 |
-
"response": "",
|
102 |
-
"info": "upload file failed",
|
103 |
-
"error_message":handle_exception(e)
|
104 |
-
}
|
105 |
|
106 |
@router.put("/book/{metadata_id}")
|
107 |
async def update_metadata(
|
@@ -153,6 +158,7 @@ async def update_metadata(
|
|
153 |
|
154 |
# Update existing metadata entry
|
155 |
metadata = db.query(Metadata).filter(Metadata.id == metadata_id).first()
|
|
|
156 |
|
157 |
if not metadata:
|
158 |
return JSONResponse(status_code=404, content="Metadata not found")
|
@@ -160,10 +166,11 @@ async def update_metadata(
|
|
160 |
updated_metadata = book_query.update_metadata_entry(
|
161 |
db, metadata_id, title, author, category_id, year, publisher
|
162 |
)
|
|
|
163 |
updated_category = category_query.get_category(db, updated_metadata.category_id)
|
|
|
164 |
|
165 |
return MetadataResponse(
|
166 |
-
status="success",
|
167 |
id=metadata_id,
|
168 |
title=updated_metadata.title,
|
169 |
author=updated_metadata.author,
|
@@ -179,10 +186,7 @@ async def update_metadata(
|
|
179 |
)
|
180 |
|
181 |
except Exception as e:
|
182 |
-
return
|
183 |
-
"status":"error",
|
184 |
-
"error_message":handle_exception(e)
|
185 |
-
}
|
186 |
|
187 |
@router.delete("/book/{metadata_id}")
|
188 |
async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id: int):
|
@@ -203,12 +207,8 @@ async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id:
|
|
203 |
db.delete(metadata)
|
204 |
db.commit()
|
205 |
|
206 |
-
return {"
|
207 |
|
208 |
except Exception as e:
|
209 |
-
return
|
210 |
-
"status": "error",
|
211 |
-
"message": "delete failed",
|
212 |
-
"error_message": handle_exception(e)
|
213 |
-
}
|
214 |
|
|
|
18 |
from utils.error_handlers import handle_exception
|
19 |
from script.vector_db import IndexManager
|
20 |
from service.dto import MetadataResponse
|
21 |
+
from service.aws_loader import Loader
|
22 |
from sqlalchemy.orm import Session
|
23 |
from sqlalchemy.future import select
|
24 |
|
|
|
38 |
try:
|
39 |
# Join Metadata with Category to get the category name
|
40 |
fetching = DataFetching(user, db)
|
|
|
41 |
metadata_fetching = fetching.metadata_fetching()
|
|
|
42 |
|
43 |
# Transform results into MetadataResponse model with optional thumbnail handling
|
44 |
return metadata_fetching
|
|
|
57 |
year: int = Form(...),
|
58 |
publisher: str = Form(...),
|
59 |
file: UploadFile = File(...),
|
60 |
+
lang: str = Form(None),
|
61 |
thumbnail: Optional[UploadFile] = File(None),
|
62 |
):
|
63 |
auth_response = check_admin_authentication(user)
|
64 |
if auth_response:
|
65 |
return auth_response
|
|
|
66 |
|
67 |
+
# Restrict `lang` to only "id" or "en"
|
68 |
+
lang = lang if lang in {"id", "en"} else "en"
|
69 |
|
70 |
# Query the category based on category_id
|
71 |
category_query = CategoryQuery(user)
|
|
|
81 |
}
|
82 |
|
83 |
# Process the file and handle data ingestion
|
84 |
+
response = await data_ingestion(reference, file, lang)
|
85 |
+
|
86 |
+
if isinstance(response, JSONResponse):
|
87 |
+
return response # Return the error response directly
|
88 |
|
89 |
+
if thumbnail:
|
90 |
+
file_name = f"{reference['title']}"
|
91 |
+
aws_loader = Loader()
|
92 |
+
ekstensi_file = file.filename.split(".")[-1].lower()
|
93 |
+
aws_loader.upload_image_to_s3(file=thumbnail, custom_name=f"{file_name}.{ekstensi_file}")
|
94 |
+
|
95 |
+
|
96 |
# Create a new Metadata object
|
97 |
book_query = BookQuery(user)
|
98 |
book_query.add_book(db, title, author, category_id, year, publisher)
|
99 |
logging.info("Database Inserted")
|
100 |
|
101 |
return {
|
|
|
102 |
"filename": file.filename,
|
103 |
"response": response,
|
104 |
"info": "upload file successfully",
|
105 |
}
|
106 |
|
107 |
except Exception as e:
|
108 |
+
return handle_exception(e)
|
109 |
+
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
@router.put("/book/{metadata_id}")
|
112 |
async def update_metadata(
|
|
|
158 |
|
159 |
# Update existing metadata entry
|
160 |
metadata = db.query(Metadata).filter(Metadata.id == metadata_id).first()
|
161 |
+
print(metadata)
|
162 |
|
163 |
if not metadata:
|
164 |
return JSONResponse(status_code=404, content="Metadata not found")
|
|
|
166 |
updated_metadata = book_query.update_metadata_entry(
|
167 |
db, metadata_id, title, author, category_id, year, publisher
|
168 |
)
|
169 |
+
print(updated_metadata)
|
170 |
updated_category = category_query.get_category(db, updated_metadata.category_id)
|
171 |
+
print(updated_category)
|
172 |
|
173 |
return MetadataResponse(
|
|
|
174 |
id=metadata_id,
|
175 |
title=updated_metadata.title,
|
176 |
author=updated_metadata.author,
|
|
|
186 |
)
|
187 |
|
188 |
except Exception as e:
|
189 |
+
return handle_exception(e)
|
|
|
|
|
|
|
190 |
|
191 |
@router.delete("/book/{metadata_id}")
|
192 |
async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id: int):
|
|
|
207 |
db.delete(metadata)
|
208 |
db.commit()
|
209 |
|
210 |
+
return {"Status": "delete successfully"}
|
211 |
|
212 |
except Exception as e:
|
213 |
+
return handle_exception(e)
|
|
|
|
|
|
|
|
|
214 |
|
api/router/book_collection.py
CHANGED
@@ -30,11 +30,7 @@ async def get_book_collection(user: user_dependency, db: db_dependency):
|
|
30 |
"book_collection": book_collection,
|
31 |
}
|
32 |
except Exception as e:
|
33 |
-
return
|
34 |
-
"status": "error",
|
35 |
-
"book_collection": [],
|
36 |
-
"error_message": handle_exception(e)
|
37 |
-
}
|
38 |
|
39 |
|
40 |
@router.post("/book_collection")
|
@@ -52,12 +48,7 @@ async def request_book_collection(
|
|
52 |
return user_meta_query.insert_user_meta_entries(db, metadata_ids)
|
53 |
|
54 |
except Exception as e:
|
55 |
-
return
|
56 |
-
"status": "error",
|
57 |
-
"message": "User meta entries failed to added.",
|
58 |
-
"metadata_ids": [], # Include the metadata IDs in the result
|
59 |
-
"error_message":handle_exception(e)
|
60 |
-
}
|
61 |
|
62 |
|
63 |
@router.put("/book_collection")
|
@@ -74,13 +65,7 @@ async def update_book_collection(
|
|
74 |
return user_meta_query.update_user_meta_entries(db, metadata_ids)
|
75 |
|
76 |
except Exception as e:
|
77 |
-
return
|
78 |
-
"status": "error",
|
79 |
-
"added_meta": [],
|
80 |
-
"deleted_meta": [],
|
81 |
-
"kept_meta": [],
|
82 |
-
"message":handle_exception(e)
|
83 |
-
}
|
84 |
|
85 |
|
86 |
@router.delete("/book_collection/{metadata_id}")
|
@@ -98,11 +83,7 @@ async def delete_book_collection(
|
|
98 |
db, metadata_id=metadata_id
|
99 |
)
|
100 |
except Exception as e:
|
101 |
-
return
|
102 |
-
"status": "error",
|
103 |
-
"message": f"Book user with id {metadata_id} deleted successfully.",
|
104 |
-
"error_message": handle_exception(e)
|
105 |
-
}
|
106 |
|
107 |
|
108 |
@router.delete("/all_collections")
|
@@ -121,8 +102,4 @@ async def delete_all_book(user: user_dependency, db: db_dependency):
|
|
121 |
"message": f"Deleted book collection for user {user.get('id')}",
|
122 |
}
|
123 |
except Exception as e:
|
124 |
-
return
|
125 |
-
"status": "error",
|
126 |
-
"message": f"Delete failed for user {user.get('id')}",
|
127 |
-
"error_message": handle_exception(e)
|
128 |
-
}
|
|
|
30 |
"book_collection": book_collection,
|
31 |
}
|
32 |
except Exception as e:
|
33 |
+
return handle_exception(e)
|
|
|
|
|
|
|
|
|
34 |
|
35 |
|
36 |
@router.post("/book_collection")
|
|
|
48 |
return user_meta_query.insert_user_meta_entries(db, metadata_ids)
|
49 |
|
50 |
except Exception as e:
|
51 |
+
return handle_exception(e)
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
@router.put("/book_collection")
|
|
|
65 |
return user_meta_query.update_user_meta_entries(db, metadata_ids)
|
66 |
|
67 |
except Exception as e:
|
68 |
+
return handle_exception(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
@router.delete("/book_collection/{metadata_id}")
|
|
|
83 |
db, metadata_id=metadata_id
|
84 |
)
|
85 |
except Exception as e:
|
86 |
+
return handle_exception(e)
|
|
|
|
|
|
|
|
|
87 |
|
88 |
|
89 |
@router.delete("/all_collections")
|
|
|
102 |
"message": f"Deleted book collection for user {user.get('id')}",
|
103 |
}
|
104 |
except Exception as e:
|
105 |
+
return handle_exception(e)
|
|
|
|
|
|
|
|
api/router/bot.py
DELETED
@@ -1,92 +0,0 @@
|
|
1 |
-
from fastapi import APIRouter, HTTPException, Depends
|
2 |
-
from service.dto import UserPromptRequest, BotResponse
|
3 |
-
from core.chat.chatstore import ChatStore
|
4 |
-
|
5 |
-
from api.function import (
|
6 |
-
generate_streaming_completion,
|
7 |
-
generate_completion_non_streaming,
|
8 |
-
)
|
9 |
-
from sse_starlette.sse import EventSourceResponse
|
10 |
-
from utils.utils import generate_uuid
|
11 |
-
|
12 |
-
router = APIRouter(tags=["Bot"])
|
13 |
-
|
14 |
-
def get_chat_store():
|
15 |
-
return ChatStore()
|
16 |
-
|
17 |
-
@router.post("/bot/new")
|
18 |
-
async def create_new_session():
|
19 |
-
session_id = generate_uuid()
|
20 |
-
return {"session_id" : session_id}
|
21 |
-
|
22 |
-
@router.get("/bot/{session_id}")
|
23 |
-
async def get_session_id(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
|
24 |
-
chat_history = chat_store.get_messages(session_id)
|
25 |
-
|
26 |
-
if not chat_history:
|
27 |
-
raise HTTPException(status_code=404, detail="Session not found or empty.")
|
28 |
-
|
29 |
-
return chat_history
|
30 |
-
|
31 |
-
@router.get("/bot")
|
32 |
-
async def get_all_session_ids():
|
33 |
-
try:
|
34 |
-
chat_store = ChatStore()
|
35 |
-
all_keys = chat_store.get_keys()
|
36 |
-
print(all_keys)
|
37 |
-
return all_keys
|
38 |
-
except Exception as e:
|
39 |
-
# Log the error and raise HTTPException for FastAPI
|
40 |
-
print(f"An error occurred in update data.: {e}")
|
41 |
-
raise HTTPException(
|
42 |
-
status_code=400, detail="the error when get all session ids"
|
43 |
-
)
|
44 |
-
|
45 |
-
|
46 |
-
@router.post("/bot/{session_id}")
|
47 |
-
async def bot_generator_general(user_prompt_request: UserPromptRequest):
|
48 |
-
|
49 |
-
if user_prompt_request.streaming:
|
50 |
-
return EventSourceResponse(
|
51 |
-
generate_streaming_completion(
|
52 |
-
user_prompt_request.prompt, user_prompt_request.streaming
|
53 |
-
)
|
54 |
-
)
|
55 |
-
else:
|
56 |
-
response, raw_references, references, metadata, scores = (
|
57 |
-
generate_completion_non_streaming(
|
58 |
-
user_prompt_request.session_id, user_prompt_request.prompt, user_prompt_request.streaming
|
59 |
-
)
|
60 |
-
)
|
61 |
-
|
62 |
-
return BotResponse(
|
63 |
-
content=response,
|
64 |
-
raw_references=raw_references,
|
65 |
-
references=references,
|
66 |
-
metadata=metadata,
|
67 |
-
scores=scores,
|
68 |
-
)
|
69 |
-
|
70 |
-
|
71 |
-
@router.post("/bot/{category_id}/{title}") #Ganti router
|
72 |
-
async def bot_generator_spesific(
|
73 |
-
category_id: int, title: str, user_prompt_request: UserPromptRequest
|
74 |
-
):
|
75 |
-
pass
|
76 |
-
|
77 |
-
@router.delete("/bot/{session_id}")
|
78 |
-
async def delete_bot(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
|
79 |
-
try:
|
80 |
-
chat_store.delete_messages(session_id)
|
81 |
-
return {"info": f"Delete {session_id} successful"}
|
82 |
-
except Exception as e:
|
83 |
-
# Log the error and raise HTTPException for FastAPI
|
84 |
-
print(f"An error occurred in update data.: {e}")
|
85 |
-
raise HTTPException(
|
86 |
-
status_code=400, detail="the error when deleting message"
|
87 |
-
)
|
88 |
-
|
89 |
-
|
90 |
-
@router.get("/bot/{category_id}/{title}")
|
91 |
-
async def get_favourite_data(category_id: int, title: str, human_template):
|
92 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/router/bot_general.py
CHANGED
@@ -27,7 +27,10 @@ def get_chat_store():
|
|
27 |
|
28 |
|
29 |
@router.post("/bot_general/new")
|
30 |
-
async def create_session_general():
|
|
|
|
|
|
|
31 |
session_id = generate_uuid()
|
32 |
return {"session_id": session_id}
|
33 |
|
@@ -54,9 +57,12 @@ async def get_session_id(
|
|
54 |
|
55 |
@router.post("/bot/{session_id}")
|
56 |
async def bot_generator_general(
|
57 |
-
session_id: str, user_prompt_request: UserPromptRequest
|
58 |
):
|
59 |
-
|
|
|
|
|
|
|
60 |
langfuse_callback_handler = LlamaIndexCallbackHandler()
|
61 |
langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
|
62 |
|
@@ -77,8 +83,12 @@ async def bot_generator_general(
|
|
77 |
|
78 |
@router.delete("/bot/{session_id}")
|
79 |
async def delete_bot(
|
80 |
-
db: db_dependency, session_id: str, chat_store: ChatStore = Depends(get_chat_store)
|
81 |
):
|
|
|
|
|
|
|
|
|
82 |
try:
|
83 |
chat_store.delete_messages(session_id)
|
84 |
# Delete session from database
|
|
|
27 |
|
28 |
|
29 |
@router.post("/bot_general/new")
|
30 |
+
async def create_session_general(user: user_dependency):
|
31 |
+
auth_response = check_user_authentication(user)
|
32 |
+
if auth_response:
|
33 |
+
return auth_response
|
34 |
session_id = generate_uuid()
|
35 |
return {"session_id": session_id}
|
36 |
|
|
|
57 |
|
58 |
@router.post("/bot/{session_id}")
|
59 |
async def bot_generator_general(
|
60 |
+
user: user_dependency,session_id: str, user_prompt_request: UserPromptRequest
|
61 |
):
|
62 |
+
auth_response = check_user_authentication(user)
|
63 |
+
if auth_response:
|
64 |
+
return auth_response
|
65 |
+
|
66 |
langfuse_callback_handler = LlamaIndexCallbackHandler()
|
67 |
langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
|
68 |
|
|
|
83 |
|
84 |
@router.delete("/bot/{session_id}")
|
85 |
async def delete_bot(
|
86 |
+
user: user_dependency,db: db_dependency, session_id: str, chat_store: ChatStore = Depends(get_chat_store)
|
87 |
):
|
88 |
+
auth_response = check_user_authentication(user)
|
89 |
+
if auth_response:
|
90 |
+
return auth_response
|
91 |
+
|
92 |
try:
|
93 |
chat_store.delete_messages(session_id)
|
94 |
# Delete session from database
|
api/router/bot_one.py
CHANGED
@@ -6,7 +6,8 @@ import pytz
|
|
6 |
from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
|
7 |
from datetime import datetime
|
8 |
from core.chat.chatstore import ChatStore
|
9 |
-
from core.chat.bot_service import ChatCompletionService
|
|
|
10 |
from db.database import get_db
|
11 |
from db.models import Session_Publisher
|
12 |
from db.query.query_book import BookQuery
|
@@ -139,7 +140,7 @@ async def get_all_session_bot_one(
|
|
139 |
"id": session.id,
|
140 |
"bot_name": session.bot_name,
|
141 |
"updated_at": str(session.updated_at),
|
142 |
-
"last_message": chat_store.get_last_message_mongodb(session.id)
|
143 |
}
|
144 |
for session in sessions
|
145 |
]
|
|
|
6 |
from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
|
7 |
from datetime import datetime
|
8 |
from core.chat.chatstore import ChatStore
|
9 |
+
# from core.chat.bot_service import ChatCompletionService
|
10 |
+
from core.chat.bot_service_multimodal import ChatCompletionService
|
11 |
from db.database import get_db
|
12 |
from db.models import Session_Publisher
|
13 |
from db.query.query_book import BookQuery
|
|
|
140 |
"id": session.id,
|
141 |
"bot_name": session.bot_name,
|
142 |
"updated_at": str(session.updated_at),
|
143 |
+
"last_message": chat_store.get_last_message_mongodb(session.id)
|
144 |
}
|
145 |
for session in sessions
|
146 |
]
|
api/router/bot_specific.py
CHANGED
@@ -10,7 +10,8 @@ from sqlalchemy.exc import SQLAlchemyError, NoResultFound
|
|
10 |
|
11 |
from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
|
12 |
from core.chat.chatstore import ChatStore
|
13 |
-
from core.chat.bot_service import ChatCompletionService
|
|
|
14 |
from db.database import get_db
|
15 |
from db.models import Bot_Meta, Bot, Metadata
|
16 |
from db.models import Session as SessionModel
|
|
|
10 |
|
11 |
from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
|
12 |
from core.chat.chatstore import ChatStore
|
13 |
+
# from core.chat.bot_service import ChatCompletionService
|
14 |
+
from core.chat.bot_service_multimodal import ChatCompletionService
|
15 |
from db.database import get_db
|
16 |
from db.models import Bot_Meta, Bot, Metadata
|
17 |
from db.models import Session as SessionModel
|
api/router/category.py
CHANGED
@@ -85,8 +85,9 @@ async def create_category(user: user_dependency, db: db_dependency, category: Ca
|
|
85 |
# Check if category already exists
|
86 |
category_query = CategoryQuery(user)
|
87 |
existing_category = category_query.get_existing_category(db, category.category_name)
|
88 |
-
|
89 |
-
|
|
|
90 |
|
91 |
# Add category
|
92 |
category_query.add_category(db, category.category_name)
|
|
|
85 |
# Check if category already exists
|
86 |
category_query = CategoryQuery(user)
|
87 |
existing_category = category_query.get_existing_category(db, category.category_name)
|
88 |
+
print(existing_category)
|
89 |
+
if existing_category: # Check if the category already exists
|
90 |
+
return JSONResponse(status_code=400, content={"error": "Category already exists"})
|
91 |
|
92 |
# Add category
|
93 |
category_query.add_category(db, category.category_name)
|
api/router/testing.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException,
|
2 |
-
from fastapi.security import OAuth2PasswordBearer
|
3 |
import httpx
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
@@ -9,63 +8,88 @@ load_dotenv()
|
|
9 |
|
10 |
app = FastAPI()
|
11 |
|
12 |
-
# Bearer token for API
|
13 |
BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
|
14 |
-
|
15 |
-
# Base URL for the Meducine API
|
16 |
BASE_URL = os.getenv("BASE_URL")
|
17 |
|
18 |
-
|
19 |
-
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/login")
|
20 |
-
|
21 |
-
@app.post("/login")
|
22 |
async def login(email: str = Form(...), password: str = Form(...)):
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
return handle_response(response) # Assuming this function formats the response correctly
|
32 |
-
except httpx.HTTPStatusError as e:
|
33 |
-
raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
|
34 |
-
except Exception as e:
|
35 |
-
raise HTTPException(status_code=500, detail=str(e))
|
36 |
-
|
37 |
-
@app.post("/actions/meducine-restapi/auth/logout")
|
38 |
-
async def logout(email: str = Form(...), password: str = Form(...)):
|
39 |
-
async with httpx.AsyncClient() as client:
|
40 |
-
response = await client.post(
|
41 |
-
f"{BASE_URL}/actions/meducine-restapi/auth/logout",
|
42 |
data={"email": email, "password": password},
|
43 |
-
|
44 |
)
|
45 |
-
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
)
|
54 |
-
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
params={"feature": feature},
|
62 |
-
|
63 |
)
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
def handle_response(response: httpx.Response):
|
67 |
"""
|
68 |
-
Handles the response
|
69 |
"""
|
70 |
if response.status_code in range(200, 300):
|
71 |
return response.json() # Successful request
|
@@ -76,6 +100,8 @@ def handle_response(response: httpx.Response):
|
|
76 |
else:
|
77 |
raise HTTPException(status_code=500, detail="Unexpected error")
|
78 |
|
|
|
|
|
79 |
# Run the application
|
80 |
if __name__ == "__main__":
|
81 |
import uvicorn
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Form
|
|
|
2 |
import httpx
|
3 |
import os
|
4 |
from dotenv import load_dotenv
|
|
|
8 |
|
9 |
app = FastAPI()
|
10 |
|
11 |
+
# Bearer token and base URL for external API (from environment variables)
|
12 |
BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
|
|
|
|
|
13 |
BASE_URL = os.getenv("BASE_URL")
|
14 |
|
15 |
+
@app.post("/actions/meducine-restapi/auth/login")
|
|
|
|
|
|
|
16 |
async def login(email: str = Form(...), password: str = Form(...)):
|
17 |
+
"""
|
18 |
+
Handles login by sending a request to the external API with the static Bearer token.
|
19 |
+
Even though it simulates a login, it uses the static Bearer token for authentication.
|
20 |
+
"""
|
21 |
+
try:
|
22 |
+
# Send login request (simulates login but uses static Bearer token)
|
23 |
+
response = await make_request(
|
24 |
+
url=f"{BASE_URL}/actions/meducine-restapi/auth/login",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
data={"email": email, "password": password},
|
26 |
+
method="POST"
|
27 |
)
|
28 |
+
return response
|
29 |
+
except Exception as e:
|
30 |
+
raise HTTPException(status_code=500, detail=f"Login failed: {str(e)}")
|
31 |
|
32 |
+
|
33 |
+
@app.post("/actions/meducine-restapi/auth/logout")
|
34 |
+
async def logout(email: str = Form(...)):
|
35 |
+
"""
|
36 |
+
Handles logout using the static Bearer token.
|
37 |
+
"""
|
38 |
+
try:
|
39 |
+
# Simulates logging out but uses the static Bearer token
|
40 |
+
response = await make_request(
|
41 |
+
url=f"{BASE_URL}/actions/meducine-restapi/auth/logout",
|
42 |
+
data={"email": email},
|
43 |
+
method="POST"
|
44 |
)
|
45 |
+
return {"message": "Logout successful", "response": response}
|
46 |
+
except Exception as e:
|
47 |
+
raise HTTPException(status_code=500, detail=f"Logout failed: {str(e)}")
|
48 |
|
49 |
+
|
50 |
+
@app.post("/meducine-restapi/user/has-premium-access")
|
51 |
+
async def check_premium_access(feature: str, email: str = Form(...), password: str = Form(...)):
|
52 |
+
"""
|
53 |
+
Checks if the user has premium access to a feature, using the static Bearer token for authentication.
|
54 |
+
"""
|
55 |
+
try:
|
56 |
+
response = await make_request(
|
57 |
+
url=f"{BASE_URL}/actions/meducine-restapi/user/has-premium-access",
|
58 |
+
data={"email": email, "password": password},
|
59 |
params={"feature": feature},
|
60 |
+
method="POST"
|
61 |
)
|
62 |
+
return response
|
63 |
+
except Exception as e:
|
64 |
+
raise HTTPException(status_code=500, detail=f"Premium access check failed: {str(e)}")
|
65 |
+
|
66 |
+
|
67 |
+
async def make_request(url: str, data: dict = None, method: str = "GET", params: dict = None):
|
68 |
+
"""
|
69 |
+
Helper function to make an HTTP request to the external API with the static Bearer token.
|
70 |
+
"""
|
71 |
+
headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
|
72 |
+
|
73 |
+
async with httpx.AsyncClient() as client:
|
74 |
+
try:
|
75 |
+
if method == "POST":
|
76 |
+
response = await client.post(url, data=data, params=params, headers=headers)
|
77 |
+
elif method == "GET":
|
78 |
+
response = await client.get(url, params=params, headers=headers)
|
79 |
+
else:
|
80 |
+
raise HTTPException(status_code=405, detail="Method not allowed")
|
81 |
+
|
82 |
+
response.raise_for_status() # Raise exception for 4xx or 5xx errors
|
83 |
+
return handle_response(response)
|
84 |
+
except httpx.HTTPStatusError as e:
|
85 |
+
raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
|
86 |
+
except Exception as e:
|
87 |
+
raise HTTPException(status_code=500, detail=str(e))
|
88 |
+
|
89 |
|
90 |
def handle_response(response: httpx.Response):
|
91 |
"""
|
92 |
+
Handles the API response, returning JSON data or raising exceptions based on status codes.
|
93 |
"""
|
94 |
if response.status_code in range(200, 300):
|
95 |
return response.json() # Successful request
|
|
|
100 |
else:
|
101 |
raise HTTPException(status_code=500, detail="Unexpected error")
|
102 |
|
103 |
+
|
104 |
+
|
105 |
# Run the application
|
106 |
if __name__ == "__main__":
|
107 |
import uvicorn
|
api/router/topic.py
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
from fastapi import Form, APIRouter, File, UploadFile, HTTPException, Request
|
2 |
-
from db.repository import get_db_conn
|
3 |
-
from db.get_data import GetDatabase
|
4 |
-
from db.save_data import InsertDatabase
|
5 |
-
from config import MYSQL_CONFIG
|
6 |
-
from api.function import data_ingestion, get_data, delete_data, update_data
|
7 |
-
from script.vector_db import IndexManager
|
8 |
-
from service.dto import MetadataRequest
|
9 |
-
|
10 |
-
router = APIRouter(tags=["Topics"])
|
11 |
-
|
12 |
-
db_conn = get_db_conn(MYSQL_CONFIG)
|
13 |
-
get_database = GetDatabase(db_conn)
|
14 |
-
index_manager = IndexManager()
|
15 |
-
|
16 |
-
|
17 |
-
@router.post("/topic")
|
18 |
-
async def upload_file(
|
19 |
-
title: str = Form(...),
|
20 |
-
author: str = Form(...),
|
21 |
-
category: str = Form(...),
|
22 |
-
year: int = Form(...),
|
23 |
-
publisher: str = Form(...),
|
24 |
-
file: UploadFile = File(...),
|
25 |
-
# content_table: UploadFile = File(...)
|
26 |
-
):
|
27 |
-
|
28 |
-
reference = {
|
29 |
-
"title": title,
|
30 |
-
"author": author,
|
31 |
-
"category": category,
|
32 |
-
"year": year,
|
33 |
-
"publisher": publisher,
|
34 |
-
}
|
35 |
-
|
36 |
-
# response = await data_ingestion(db_conn, reference, file, content_table)
|
37 |
-
response = await data_ingestion(db_conn, reference, file)
|
38 |
-
return {"filename": file.filename, "response": response}
|
39 |
-
|
40 |
-
|
41 |
-
@router.get("/topic")
|
42 |
-
async def get_metadata():
|
43 |
-
results = await get_data(db_conn)
|
44 |
-
return results
|
45 |
-
|
46 |
-
|
47 |
-
@router.put("/topic/{id}")
|
48 |
-
async def update_metadata(id: int, reference: MetadataRequest):
|
49 |
-
try :
|
50 |
-
old_reference = await get_database.get_data_by_id(id)
|
51 |
-
index_manager.update_vector_database(old_reference, reference)
|
52 |
-
|
53 |
-
return await update_data(id, reference, db_conn)
|
54 |
-
except Exception as e:
|
55 |
-
raise HTTPException(status_code=500, detail="An error occurred while updating metadata")
|
56 |
-
|
57 |
-
|
58 |
-
@router.delete("/topic/{id}")
|
59 |
-
async def delete_metadata(id: int):
|
60 |
-
try:
|
61 |
-
old_reference = await get_database.get_data_by_id(id)
|
62 |
-
index_manager.delete_vector_database(old_reference)
|
63 |
-
|
64 |
-
return await delete_data(id, db_conn)
|
65 |
-
|
66 |
-
except Exception as e:
|
67 |
-
print(e)
|
68 |
-
raise HTTPException(status_code=500, detail="An error occurred while delete metadata")
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/router/user.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
from datetime import timedelta
|
2 |
from typing import Annotated
|
3 |
|
@@ -12,6 +15,12 @@ from db.database import get_db
|
|
12 |
from api.auth import get_current_user, create_access_token
|
13 |
from service.dto import CreateUserRequest, UserVerification, Token
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
router = APIRouter(tags=["User"])
|
17 |
|
@@ -23,19 +32,82 @@ user_dependency = Annotated[dict, Depends(get_current_user)]
|
|
23 |
ACCESS_TOKEN_EXPIRE_MINUTES = 43200
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
@router.post("/login", response_model=Token)
|
27 |
async def login_for_access_token(
|
28 |
login_data: Annotated[OAuth2PasswordRequestForm, Depends()],
|
29 |
db: Session = Depends(get_db),
|
30 |
):
|
31 |
-
|
32 |
-
|
33 |
-
if
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
try:
|
41 |
access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
@@ -52,7 +124,7 @@ async def login_for_access_token(
|
|
52 |
|
53 |
except Exception as e:
|
54 |
print(e)
|
55 |
-
return JSONResponse(status_code=500, content="An error
|
56 |
|
57 |
|
58 |
@router.get("/login", response_model=dict)
|
@@ -90,7 +162,6 @@ async def get_all_users(user: user_dependency, db: Session = Depends(get_db)):
|
|
90 |
]
|
91 |
|
92 |
|
93 |
-
@router.post("/register")
|
94 |
async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
|
95 |
existing_user = (
|
96 |
db.query(User).filter(User.email == create_user_request.email).first()
|
@@ -122,27 +193,27 @@ async def register_user(db: db_dependency, create_user_request: CreateUserReques
|
|
122 |
)
|
123 |
|
124 |
|
125 |
-
@router.post("/forgot_password")
|
126 |
-
async def forget_password():
|
127 |
-
|
128 |
|
129 |
|
130 |
-
@router.post("/change_password")
|
131 |
-
async def change_password(
|
132 |
-
|
133 |
-
):
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
|
148 |
-
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
from datetime import timedelta
|
5 |
from typing import Annotated
|
6 |
|
|
|
15 |
from api.auth import get_current_user, create_access_token
|
16 |
from service.dto import CreateUserRequest, UserVerification, Token
|
17 |
|
18 |
+
from collections import Counter
|
19 |
+
from time import time
|
20 |
+
|
21 |
+
|
22 |
+
load_dotenv()
|
23 |
+
|
24 |
|
25 |
router = APIRouter(tags=["User"])
|
26 |
|
|
|
32 |
ACCESS_TOKEN_EXPIRE_MINUTES = 43200
|
33 |
|
34 |
|
35 |
+
# Rate-limiting config
|
36 |
+
FAILED_ATTEMPT_LIMIT = 3
|
37 |
+
BLOCK_TIME_SECONDS = 300 # Block for 5 minutes
|
38 |
+
|
39 |
+
# In-memory tracking for failed attempts
|
40 |
+
failed_attempts = Counter()
|
41 |
+
blocked_users = {}
|
42 |
+
|
43 |
+
|
44 |
@router.post("/login", response_model=Token)
|
45 |
async def login_for_access_token(
|
46 |
login_data: Annotated[OAuth2PasswordRequestForm, Depends()],
|
47 |
db: Session = Depends(get_db),
|
48 |
):
|
49 |
+
username = login_data.username
|
50 |
+
|
51 |
+
# Check if user is blocked
|
52 |
+
if username in blocked_users:
|
53 |
+
block_until = blocked_users[username]
|
54 |
+
if time() < block_until:
|
55 |
+
return JSONResponse(
|
56 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
57 |
+
content=f"Too many failed attempts. Try again after {int(block_until - time())} seconds.",
|
58 |
+
)
|
59 |
+
|
60 |
+
else:
|
61 |
+
# Unblock the user after the time period
|
62 |
+
del blocked_users[username]
|
63 |
+
del failed_attempts[username]
|
64 |
+
|
65 |
+
user = db.query(User).filter(User.username == username).first()
|
66 |
+
|
67 |
+
if not user:
|
68 |
+
# Automatically register the user
|
69 |
+
create_user_request = CreateUserRequest(
|
70 |
+
name=login_data.username,
|
71 |
+
username=login_data.username,
|
72 |
+
email=login_data.username,
|
73 |
+
password=os.getenv("USER_PASSWORD"), # Replace with a generated or temporary password
|
74 |
+
role_id=2,
|
75 |
+
)
|
76 |
+
registration_response = await register_user(db, create_user_request)
|
77 |
+
|
78 |
+
if isinstance(registration_response, JSONResponse):
|
79 |
+
return registration_response # Return error response if registration failed
|
80 |
+
|
81 |
+
# Retrieve the newly created user after successful registration
|
82 |
+
user = db.query(User).filter(User.username == username).first()
|
83 |
+
|
84 |
+
if not user:
|
85 |
+
return JSONResponse(
|
86 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
87 |
+
content="User registration failed unexpectedly."
|
88 |
+
)
|
89 |
+
|
90 |
+
correct_password = (
|
91 |
+
bcrypt_context.verify(os.getenv("USER_PASSWORD"), user.password_hash) or
|
92 |
+
bcrypt_context.verify(login_data.password, user.password_hash)
|
93 |
)
|
94 |
+
|
95 |
+
if not correct_password :
|
96 |
+
failed_attempts[username] = failed_attempts.get(username, 0) + 1
|
97 |
+
if failed_attempts[username] >= FAILED_ATTEMPT_LIMIT:
|
98 |
+
blocked_users[username] = time() + BLOCK_TIME_SECONDS
|
99 |
+
failed_attempts.pop(username, None) # Reset after blocking
|
100 |
+
return JSONResponse(
|
101 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
102 |
+
content="Too many failed attempts. You are temporarily blocked."
|
103 |
+
)
|
104 |
+
|
105 |
+
return JSONResponse(
|
106 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
107 |
+
content="Invalid credentials."
|
108 |
+
)
|
109 |
+
|
110 |
+
failed_attempts.pop(username, None)
|
111 |
|
112 |
try:
|
113 |
access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
|
|
124 |
|
125 |
except Exception as e:
|
126 |
print(e)
|
127 |
+
return JSONResponse(status_code=500, content="An error occurred during login")
|
128 |
|
129 |
|
130 |
@router.get("/login", response_model=dict)
|
|
|
162 |
]
|
163 |
|
164 |
|
|
|
165 |
async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
|
166 |
existing_user = (
|
167 |
db.query(User).filter(User.email == create_user_request.email).first()
|
|
|
193 |
)
|
194 |
|
195 |
|
196 |
+
# @router.post("/forgot_password")
|
197 |
+
# async def forget_password():
|
198 |
+
# pass
|
199 |
|
200 |
|
201 |
+
# @router.post("/change_password")
|
202 |
+
# async def change_password(
|
203 |
+
# user: user_dependency, db: db_dependency, user_verification: UserVerification
|
204 |
+
# ):
|
205 |
+
# if user is None:
|
206 |
+
# return JSONResponse(status_code=401, content="Authentication Failed")
|
207 |
+
# user_model = db.query(User).filter(User.id == user.get("id")).first()
|
208 |
|
209 |
+
# if not bcrypt_context.verify(
|
210 |
+
# user_verification.password, user_model.hashed_password
|
211 |
+
# ):
|
212 |
+
# return JSONResponse(status_code=401, content="Error on password change")
|
213 |
|
214 |
+
# user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
|
215 |
+
# db.add(user_model)
|
216 |
+
# db.commit()
|
217 |
+
# db.refresh(user_model)
|
218 |
|
219 |
+
# return {"message": "User's password successfully changed", "user_id": user_model.id}
|
api/util/util.py
DELETED
File without changes
|
app.py
CHANGED
@@ -23,7 +23,7 @@ def create_instance() -> FastAPI:
|
|
23 |
def add_middleware(app: FastAPI) -> FastAPI:
|
24 |
app.add_middleware(
|
25 |
CORSMiddleware,
|
26 |
-
allow_origins=["
|
27 |
allow_credentials=True,
|
28 |
allow_methods=["*"],
|
29 |
allow_headers=["*"],
|
@@ -48,7 +48,7 @@ def register_routers(app: FastAPI) -> FastAPI:
|
|
48 |
app.include_router(health.router)
|
49 |
|
50 |
return app
|
51 |
-
|
52 |
|
53 |
def init_app() -> FastAPI:
|
54 |
app: FastAPI = pipe(
|
|
|
23 |
def add_middleware(app: FastAPI) -> FastAPI:
|
24 |
app.add_middleware(
|
25 |
CORSMiddleware,
|
26 |
+
allow_origins=["*"],
|
27 |
allow_credentials=True,
|
28 |
allow_methods=["*"],
|
29 |
allow_headers=["*"],
|
|
|
48 |
app.include_router(health.router)
|
49 |
|
50 |
return app
|
51 |
+
|
52 |
|
53 |
def init_app() -> FastAPI:
|
54 |
app: FastAPI = pipe(
|
config.py
CHANGED
@@ -30,7 +30,7 @@ class PineconeConfig(BaseSettings):
|
|
30 |
class GPTBotConfig(BaseSettings):
|
31 |
temperature : float = 0.3
|
32 |
model : str = "gpt-4o-mini"
|
33 |
-
max_tokens : int =
|
34 |
streaming : bool = False
|
35 |
api_key : str = os.environ.get("OPENAI_API_KEY")
|
36 |
|
|
|
30 |
class GPTBotConfig(BaseSettings):
|
31 |
temperature : float = 0.3
|
32 |
model : str = "gpt-4o-mini"
|
33 |
+
max_tokens : int = 4096
|
34 |
streaming : bool = False
|
35 |
api_key : str = os.environ.get("OPENAI_API_KEY")
|
36 |
|
controller/__init__.py
DELETED
File without changes
|
controller/book_collection_controller.py
DELETED
File without changes
|
controller/book_controller.py
DELETED
File without changes
|
controller/bot_general_controller.py
DELETED
File without changes
|
controller/bot_one_controller.py
DELETED
File without changes
|
controller/bot_specific_controller.py
DELETED
File without changes
|
controller/category_controller.py
DELETED
File without changes
|
controller/user_controller.py
DELETED
File without changes
|
core/book_enabler/__init__.py
DELETED
File without changes
|
core/chat/bot_service.py
CHANGED
@@ -69,7 +69,6 @@ class ChatCompletionService:
|
|
69 |
|
70 |
if self.type_bot == "general":
|
71 |
response = redesign_structure_message(response, metadata_collection)
|
72 |
-
print(response)
|
73 |
|
74 |
# Save the message to chat store
|
75 |
self._store_message_in_chatstore(response, metadata_collection)
|
@@ -136,6 +135,30 @@ class ChatCompletionService:
|
|
136 |
print("No sources available")
|
137 |
|
138 |
return contents, metadata_collection, scores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
def _attach_contents_to_metadata(self, contents, metadata_collection):
|
141 |
for i in range(min(len(contents), len(metadata_collection))):
|
@@ -171,5 +194,4 @@ class ChatCompletionService:
|
|
171 |
|
172 |
db = self.client["bot_database"] # Replace with your database name
|
173 |
collection = db[self.session_id] # Replace with your collection name
|
174 |
-
|
175 |
-
print("Data inserted with record ids", result.inserted_ids)
|
|
|
69 |
|
70 |
if self.type_bot == "general":
|
71 |
response = redesign_structure_message(response, metadata_collection)
|
|
|
72 |
|
73 |
# Save the message to chat store
|
74 |
self._store_message_in_chatstore(response, metadata_collection)
|
|
|
135 |
print("No sources available")
|
136 |
|
137 |
return contents, metadata_collection, scores
|
138 |
+
|
139 |
+
# def _process_sources_images(self, sources, number_reference_sorted):
|
140 |
+
# contents, metadata_collection, scores = [], [], []
|
141 |
+
# if not number_reference_sorted:
|
142 |
+
# print("There are no references")
|
143 |
+
# return contents, metadata_collection, scores
|
144 |
+
|
145 |
+
# for number in range (len(sources)):
|
146 |
+
# number = int(number)
|
147 |
+
# if sources and len(sources) > 0:
|
148 |
+
# node = dict(sources[0])["raw_output"].source_nodes
|
149 |
+
# if 0 <= number - 1 < len(node):
|
150 |
+
# content = node[number - 1].node.get_text()
|
151 |
+
# contents.append(content)
|
152 |
+
# metadata = dict(node[number - 1].node.metadata)
|
153 |
+
# metadata_collection.append(metadata)
|
154 |
+
# score = node[number - 1].score
|
155 |
+
# scores.append(score)
|
156 |
+
# else:
|
157 |
+
# print(f"Invalid reference number: {number}")
|
158 |
+
# else:
|
159 |
+
# print("No sources available")
|
160 |
+
|
161 |
+
# return contents, metadata_collection, scores
|
162 |
|
163 |
def _attach_contents_to_metadata(self, contents, metadata_collection):
|
164 |
for i in range(min(len(contents), len(metadata_collection))):
|
|
|
194 |
|
195 |
db = self.client["bot_database"] # Replace with your database name
|
196 |
collection = db[self.session_id] # Replace with your collection name
|
197 |
+
collection.insert_many(chat_history_json)
|
|
core/chat/bot_service_multimodal.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import re
|
3 |
+
import os
|
4 |
+
import pytz
|
5 |
+
|
6 |
+
from typing import List
|
7 |
+
from datetime import datetime
|
8 |
+
from datetime import timedelta
|
9 |
+
from fastapi.responses import JSONResponse
|
10 |
+
from script.vector_db import IndexManager
|
11 |
+
from llama_index.core.llms import MessageRole
|
12 |
+
|
13 |
+
from core.chat.engine import Engine
|
14 |
+
from core.chat.chatstore import ChatStore
|
15 |
+
from core.parser import (
|
16 |
+
filter_metadata_by_pages,
|
17 |
+
extract_sorted_page_numbers
|
18 |
+
|
19 |
+
)
|
20 |
+
|
21 |
+
from service.dto import ChatMessage
|
22 |
+
from pymongo.mongo_client import MongoClient
|
23 |
+
|
24 |
+
|
25 |
+
class ChatCompletionService:
|
26 |
+
def __init__(
|
27 |
+
self,
|
28 |
+
session_id: str,
|
29 |
+
user_request: str,
|
30 |
+
titles: List = None,
|
31 |
+
type_bot: str = "general",
|
32 |
+
):
|
33 |
+
self.session_id = session_id
|
34 |
+
self.user_request = user_request
|
35 |
+
self.titles = titles
|
36 |
+
self.type_bot = type_bot
|
37 |
+
self.client = MongoClient(os.getenv("MONGO_URI"))
|
38 |
+
self.engine = Engine()
|
39 |
+
self.index_manager = IndexManager()
|
40 |
+
self.chatstore = ChatStore()
|
41 |
+
|
42 |
+
def generate_completion(self):
|
43 |
+
if not self._ping_mongo():
|
44 |
+
return JSONResponse(
|
45 |
+
status_code=500, content="Database Error: Unable to connect to MongoDB"
|
46 |
+
)
|
47 |
+
|
48 |
+
try:
|
49 |
+
# Load and retrieve chat engine with appropriate index
|
50 |
+
index = self.index_manager.load_existing_indexes()
|
51 |
+
chat_engine = self._get_chat_engine(index)
|
52 |
+
|
53 |
+
# Generate chat response
|
54 |
+
response = chat_engine.chat(self.user_request)
|
55 |
+
|
56 |
+
sources = response.source_nodes
|
57 |
+
|
58 |
+
contents, metadata_collection, scores = self._process_sources_images(sources)
|
59 |
+
|
60 |
+
# Update response and renumber sources
|
61 |
+
response = str(response)
|
62 |
+
|
63 |
+
# Add contents to metadata
|
64 |
+
metadata_collection = self._attach_contents_to_metadata(
|
65 |
+
contents, metadata_collection
|
66 |
+
)
|
67 |
+
|
68 |
+
page_sources = extract_sorted_page_numbers(response)
|
69 |
+
metadata_collection = filter_metadata_by_pages(metadata_collection, page_sources)
|
70 |
+
|
71 |
+
# Save the message to chat store
|
72 |
+
self._store_message_in_chatstore(response, metadata_collection)
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
logging.error(f"An error occurred in generate text: {e}")
|
76 |
+
return JSONResponse(
|
77 |
+
status_code=500, content=f"An internal server error occurred: {e}"
|
78 |
+
)
|
79 |
+
|
80 |
+
try:
|
81 |
+
if self.type_bot == "specific":
|
82 |
+
self._save_chat_history_to_db(response, metadata_collection)
|
83 |
+
|
84 |
+
return str(response), metadata_collection, scores
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
logging.error(f"An error occurred while saving chat history: {e}")
|
88 |
+
return JSONResponse(
|
89 |
+
status_code=500,
|
90 |
+
content=f"An internal server error occurred while saving chat history: {e}",
|
91 |
+
)
|
92 |
+
|
93 |
+
def _ping_mongo(self):
|
94 |
+
try:
|
95 |
+
self.client.admin.command("ping")
|
96 |
+
print("Pinged your deployment. Successfully connected to MongoDB!")
|
97 |
+
return True
|
98 |
+
except Exception as e:
|
99 |
+
logging.error(f"MongoDB connection failed: {e}")
|
100 |
+
return False
|
101 |
+
|
102 |
+
def _get_chat_engine(self, index):
|
103 |
+
if self.type_bot == "general":
|
104 |
+
return self.engine.get_chat_engine(self.session_id, index)
|
105 |
+
return self.engine.get_chat_engine(
|
106 |
+
self.session_id, index, self.titles, self.type_bot
|
107 |
+
)
|
108 |
+
|
109 |
+
def _extract_sorted_references(self, response):
|
110 |
+
number_reference = list(set(re.findall(r"\[(\d+)\]", str(response))))
|
111 |
+
return sorted(number_reference)
|
112 |
+
|
113 |
+
|
114 |
+
def _process_sources_images(self, sources):
|
115 |
+
contents, metadata_collection, scores = [], [], []
|
116 |
+
|
117 |
+
for number in range (len(sources)):
|
118 |
+
if sources and len(sources) > 0:
|
119 |
+
content = sources[number - 1].node.get_text()
|
120 |
+
contents.append(content)
|
121 |
+
metadata = dict(sources[number - 1].node.metadata)
|
122 |
+
metadata_collection.append(metadata)
|
123 |
+
score = sources[number - 1].score
|
124 |
+
scores.append(score)
|
125 |
+
else:
|
126 |
+
print("No sources available")
|
127 |
+
|
128 |
+
return contents, metadata_collection, scores
|
129 |
+
|
130 |
+
def _attach_contents_to_metadata(self, contents, metadata_collection):
|
131 |
+
for i in range(min(len(contents), len(metadata_collection))):
|
132 |
+
metadata_collection[i]["content"] = contents[i]
|
133 |
+
return metadata_collection
|
134 |
+
|
135 |
+
def _store_message_in_chatstore(self, response, metadata_collection):
|
136 |
+
message = ChatMessage(
|
137 |
+
role=MessageRole.ASSISTANT,
|
138 |
+
content=response,
|
139 |
+
metadata=metadata_collection,
|
140 |
+
)
|
141 |
+
self.chatstore.delete_last_message(self.session_id)
|
142 |
+
self.chatstore.add_message(self.session_id, message)
|
143 |
+
self.chatstore.clean_message(self.session_id)
|
144 |
+
|
145 |
+
def _save_chat_history_to_db(self, response, metadata_collection):
|
146 |
+
jakarta_tz = pytz.timezone("Asia/Jakarta")
|
147 |
+
time_now = datetime.now(jakarta_tz)
|
148 |
+
user_timestamp = time_now - timedelta(seconds=0.2)
|
149 |
+
chat_history_db = [
|
150 |
+
ChatMessage(
|
151 |
+
role=MessageRole.USER,
|
152 |
+
content=self.user_request,
|
153 |
+
timestamp=user_timestamp,
|
154 |
+
payment="free" if self.type_bot == "general" else None,
|
155 |
+
),
|
156 |
+
ChatMessage(
|
157 |
+
role=MessageRole.ASSISTANT,
|
158 |
+
content=response,
|
159 |
+
metadata=metadata_collection,
|
160 |
+
timestamp=time_now,
|
161 |
+
payment="free" if self.type_bot == "general" else None,
|
162 |
+
),
|
163 |
+
]
|
164 |
+
|
165 |
+
chat_history_json = [message.model_dump() for message in chat_history_db]
|
166 |
+
|
167 |
+
db = self.client["bot_database"] # Replace with your database name
|
168 |
+
collection = db[self.session_id] # Replace with your collection name
|
169 |
+
collection.insert_many(chat_history_json)
|
core/chat/chatstore.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
import redis
|
2 |
import os
|
3 |
import json
|
|
|
|
|
|
|
|
|
4 |
from fastapi.responses import JSONResponse
|
5 |
from typing import Optional, List, Dict
|
6 |
from llama_index.storage.chat_store.redis import RedisChatStore
|
@@ -8,44 +12,50 @@ from pymongo.mongo_client import MongoClient
|
|
8 |
from llama_index.core.memory import ChatMemoryBuffer
|
9 |
from service.dto import ChatMessage
|
10 |
|
|
|
|
|
11 |
|
12 |
class ChatStore:
|
13 |
def __init__(self):
|
14 |
self.redis_client = redis.Redis(
|
15 |
-
host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
)
|
19 |
|
20 |
uri = os.getenv("MONGO_URI")
|
21 |
self.client = MongoClient(uri)
|
22 |
|
23 |
def initialize_memory_bot(self, session_id):
|
24 |
-
|
|
|
|
|
25 |
chat_store = RedisChatStore(
|
26 |
redis_client=self.redis_client, ttl=86400 # Time-to-live set for 1 hour
|
27 |
)
|
28 |
-
|
29 |
db = self.client["bot_database"]
|
30 |
-
|
31 |
-
if
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
#
|
|
|
40 |
memory = ChatMemoryBuffer.from_defaults(
|
41 |
token_limit=3000, chat_store=chat_store, chat_store_key=session_id
|
42 |
)
|
43 |
else:
|
44 |
-
#
|
45 |
memory = ChatMemoryBuffer.from_defaults(
|
46 |
token_limit=3000, chat_store=chat_store, chat_store_key=session_id
|
47 |
)
|
48 |
-
|
49 |
return memory
|
50 |
|
51 |
def get_messages(self, session_id: str) -> List[dict]:
|
@@ -56,14 +66,14 @@ class ChatStore:
|
|
56 |
|
57 |
# Decode and parse each item into a dictionary
|
58 |
return [json.loads(m.decode("utf-8")) for m in items]
|
59 |
-
|
60 |
def get_last_message(self, session_id: str) -> Optional[Dict]:
|
61 |
"""Get the last message for a session_id."""
|
62 |
last_message = self.redis_client.lindex(session_id, -1)
|
63 |
-
|
64 |
if last_message is None:
|
65 |
return None # Return None if there are no messages
|
66 |
-
|
67 |
# Decode and parse the last message into a dictionary
|
68 |
return json.loads(last_message.decode("utf-8"))
|
69 |
|
@@ -73,11 +83,13 @@ class ChatStore:
|
|
73 |
|
74 |
# Get the last document by sorting by _id in descending order
|
75 |
last_document = collection.find().sort("_id", -1).limit(1)
|
76 |
-
|
|
|
77 |
for doc in last_document:
|
78 |
-
doc
|
79 |
-
|
80 |
-
|
|
|
81 |
|
82 |
def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
|
83 |
"""Delete last message for a session_id."""
|
@@ -113,21 +125,23 @@ class ChatStore:
|
|
113 |
def get_keys(self) -> List[str]:
|
114 |
"""Get all keys."""
|
115 |
try:
|
116 |
-
print(self.redis_client.keys("*"))
|
117 |
return [key.decode("utf-8") for key in self.redis_client.keys("*")]
|
118 |
|
119 |
except Exception as e:
|
120 |
-
# Log the error and return JSONResponse for FastAPI
|
121 |
-
print(f"An error occurred in update data.: {e}")
|
122 |
return JSONResponse(status_code=400, content="the error when get keys")
|
123 |
|
124 |
-
def add_message(self, session_id: str, message: ChatMessage) -> None:
|
125 |
"""Add a message for a session_id."""
|
126 |
item = json.dumps(self._message_to_dict(message))
|
127 |
self.redis_client.rpush(session_id, item)
|
128 |
|
129 |
-
def _message_to_dict(self, message: ChatMessage) -> dict:
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
def add_chat_history_to_redis(self, session_id: str) -> None:
|
133 |
"""Fetch chat history from MongoDB and add it to Redis."""
|
@@ -169,18 +183,11 @@ class ChatStore:
|
|
169 |
|
170 |
# Convert the cursor to a list and exclude the _id field
|
171 |
documents_list = [
|
172 |
-
{key: doc[key] for key in doc if key !=
|
173 |
for doc in documents
|
174 |
]
|
175 |
|
176 |
-
# Print the list of documents without the _id field
|
177 |
-
print(documents_list) # Optional: If you want to see the output
|
178 |
-
|
179 |
return documents_list
|
180 |
-
|
181 |
except Exception as e:
|
182 |
-
|
183 |
-
return JSONResponse(
|
184 |
-
status_code=500,
|
185 |
-
content=f"An error occurred while retrieving messages: {e}",
|
186 |
-
)
|
|
|
1 |
import redis
|
2 |
import os
|
3 |
import json
|
4 |
+
|
5 |
+
from datetime import datetime
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
from fastapi.responses import JSONResponse
|
9 |
from typing import Optional, List, Dict
|
10 |
from llama_index.storage.chat_store.redis import RedisChatStore
|
|
|
12 |
from llama_index.core.memory import ChatMemoryBuffer
|
13 |
from service.dto import ChatMessage
|
14 |
|
15 |
+
load_dotenv()
|
16 |
+
|
17 |
|
18 |
class ChatStore:
|
19 |
def __init__(self):
|
20 |
self.redis_client = redis.Redis(
|
21 |
+
# host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
|
22 |
+
host = os.getenv("REDIS_HOST"),
|
23 |
+
port=os.getenv("REDIS_PORT"),
|
24 |
+
username = os.getenv("REDIS_USERNAME"),
|
25 |
+
password=os.getenv("REDIS_PASSWORD"),
|
26 |
)
|
27 |
|
28 |
uri = os.getenv("MONGO_URI")
|
29 |
self.client = MongoClient(uri)
|
30 |
|
31 |
def initialize_memory_bot(self, session_id):
|
32 |
+
# Decode Redis keys to work with strings
|
33 |
+
redis_keys = [key.decode('utf-8') for key in self.redis_client.keys()]
|
34 |
+
|
35 |
chat_store = RedisChatStore(
|
36 |
redis_client=self.redis_client, ttl=86400 # Time-to-live set for 1 hour
|
37 |
)
|
|
|
38 |
db = self.client["bot_database"]
|
39 |
+
|
40 |
+
# Check if the session exists in Redis or MongoDB
|
41 |
+
if session_id in redis_keys:
|
42 |
+
# If the session already exists in Redis, create the memory buffer using Redis
|
43 |
+
memory = ChatMemoryBuffer.from_defaults(
|
44 |
+
token_limit=3000, chat_store=chat_store, chat_store_key=session_id
|
45 |
+
)
|
46 |
+
elif session_id in db.list_collection_names():
|
47 |
+
# If the session exists in MongoDB but not Redis, fetch messages from MongoDB
|
48 |
+
self.add_chat_history_to_redis(session_id) # Add chat history to Redis
|
49 |
+
# Then create the memory buffer using Redis
|
50 |
memory = ChatMemoryBuffer.from_defaults(
|
51 |
token_limit=3000, chat_store=chat_store, chat_store_key=session_id
|
52 |
)
|
53 |
else:
|
54 |
+
# If the session doesn't exist in either Redis or MongoDB, create an empty memory buffer
|
55 |
memory = ChatMemoryBuffer.from_defaults(
|
56 |
token_limit=3000, chat_store=chat_store, chat_store_key=session_id
|
57 |
)
|
58 |
+
|
59 |
return memory
|
60 |
|
61 |
def get_messages(self, session_id: str) -> List[dict]:
|
|
|
66 |
|
67 |
# Decode and parse each item into a dictionary
|
68 |
return [json.loads(m.decode("utf-8")) for m in items]
|
69 |
+
|
70 |
def get_last_message(self, session_id: str) -> Optional[Dict]:
|
71 |
"""Get the last message for a session_id."""
|
72 |
last_message = self.redis_client.lindex(session_id, -1)
|
73 |
+
|
74 |
if last_message is None:
|
75 |
return None # Return None if there are no messages
|
76 |
+
|
77 |
# Decode and parse the last message into a dictionary
|
78 |
return json.loads(last_message.decode("utf-8"))
|
79 |
|
|
|
83 |
|
84 |
# Get the last document by sorting by _id in descending order
|
85 |
last_document = collection.find().sort("_id", -1).limit(1)
|
86 |
+
|
87 |
+
# Iterasi last_document dan kembalikan isi content jika ada
|
88 |
for doc in last_document:
|
89 |
+
return str(doc.get('content', "")) # kembalikan content atau string kosong jika tidak ada
|
90 |
+
|
91 |
+
# Jika tidak ada dokumen, kembalikan string kosong
|
92 |
+
return ""
|
93 |
|
94 |
def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
|
95 |
"""Delete last message for a session_id."""
|
|
|
125 |
def get_keys(self) -> List[str]:
|
126 |
"""Get all keys."""
|
127 |
try:
|
|
|
128 |
return [key.decode("utf-8") for key in self.redis_client.keys("*")]
|
129 |
|
130 |
except Exception as e:
|
|
|
|
|
131 |
return JSONResponse(status_code=400, content="the error when get keys")
|
132 |
|
133 |
+
def add_message(self, session_id: str, message: Optional[ChatMessage]) -> None:
|
134 |
"""Add a message for a session_id."""
|
135 |
item = json.dumps(self._message_to_dict(message))
|
136 |
self.redis_client.rpush(session_id, item)
|
137 |
|
138 |
+
def _message_to_dict(self, message: Optional[ChatMessage]) -> dict:
|
139 |
+
# Convert the ChatMessage instance into a dictionary with necessary adjustments
|
140 |
+
message_dict = message.model_dump()
|
141 |
+
# Convert any datetime fields to ISO format, if needed
|
142 |
+
if isinstance(message_dict.get('timestamp'), datetime):
|
143 |
+
message_dict['timestamp'] = message_dict['timestamp'].isoformat()
|
144 |
+
return message_dict
|
145 |
|
146 |
def add_chat_history_to_redis(self, session_id: str) -> None:
|
147 |
"""Fetch chat history from MongoDB and add it to Redis."""
|
|
|
183 |
|
184 |
# Convert the cursor to a list and exclude the _id field
|
185 |
documents_list = [
|
186 |
+
{key: doc[key] for key in doc if key !="_id" and doc[key] is not None}
|
187 |
for doc in documents
|
188 |
]
|
189 |
|
|
|
|
|
|
|
190 |
return documents_list
|
191 |
+
|
192 |
except Exception as e:
|
193 |
+
return JSONResponse(status_code=500, content=f"An error occurred while retrieving messages: {e}")
|
|
|
|
|
|
|
|
core/chat/engine.py
CHANGED
@@ -8,11 +8,14 @@ from llama_index.core.tools import QueryEngineTool, ToolMetadata
|
|
8 |
from llama_index.agent.openai import OpenAIAgent
|
9 |
from llama_index.llms.openai import OpenAI
|
10 |
from llama_index.core.query_engine import CitationQueryEngine
|
|
|
|
|
11 |
from llama_index.core import Settings
|
12 |
from core.chat.chatstore import ChatStore
|
|
|
13 |
|
14 |
from config import GPTBOT_CONFIG
|
15 |
-
from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS
|
16 |
from core.parser import join_list
|
17 |
|
18 |
|
@@ -27,8 +30,11 @@ class Engine:
|
|
27 |
|
28 |
self.chat_store = ChatStore()
|
29 |
Settings.llm = self.llm
|
|
|
|
|
30 |
|
31 |
def get_citation_engine(self, titles:List, index):
|
|
|
32 |
filters = [
|
33 |
MetadataFilter(
|
34 |
key="title",
|
@@ -41,41 +47,66 @@ class Engine:
|
|
41 |
filters = MetadataFilters(filters=filters, condition="or")
|
42 |
|
43 |
# Create the QueryEngineTool with the index and filters
|
44 |
-
kwargs = {"similarity_top_k":
|
45 |
|
46 |
retriever = index.as_retriever(**kwargs)
|
47 |
|
48 |
# citation_engine = CitationQueryEngine(retriever=retriever)
|
49 |
|
50 |
-
return CitationQueryEngine.from_args(index, retriever=retriever)
|
|
|
|
|
51 |
|
52 |
def get_chat_engine(self, session_id, index, titles=None, type_bot="general"):
|
53 |
# Create the QueryEngineTool based on the type
|
54 |
if type_bot == "general":
|
55 |
# query_engine = index.as_query_engine(similarity_top_k=3)
|
56 |
-
citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
|
57 |
-
|
|
|
|
|
|
|
58 |
else:
|
59 |
citation_engine = self.get_citation_engine(titles, index)
|
60 |
-
description = "A book containing information about medicine"
|
61 |
|
62 |
-
metadata = ToolMetadata(name="bot-belajar", description=description)
|
63 |
-
print(metadata)
|
64 |
|
65 |
-
vector_query_engine = QueryEngineTool(
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
)
|
68 |
-
|
69 |
|
70 |
# Initialize the OpenAI agent with the tools
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
if type_bot == "general":
|
73 |
-
system_prompt =
|
74 |
else:
|
75 |
additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
|
76 |
-
system_prompt =
|
|
|
77 |
chat_engine = OpenAIAgent.from_tools(
|
78 |
-
tools=[
|
79 |
llm=self.llm,
|
80 |
memory=self.chat_store.initialize_memory_bot(session_id),
|
81 |
system_prompt=system_prompt,
|
|
|
8 |
from llama_index.agent.openai import OpenAIAgent
|
9 |
from llama_index.llms.openai import OpenAI
|
10 |
from llama_index.core.query_engine import CitationQueryEngine
|
11 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
12 |
+
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
|
13 |
from llama_index.core import Settings
|
14 |
from core.chat.chatstore import ChatStore
|
15 |
+
from core.multimodal import MultimodalQueryEngine
|
16 |
|
17 |
from config import GPTBOT_CONFIG
|
18 |
+
from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS,SYSTEM_BOT_GENERAL_TEMPLATE, SYSTEM_BOT_IMAGE_TEMPLATE
|
19 |
from core.parser import join_list
|
20 |
|
21 |
|
|
|
30 |
|
31 |
self.chat_store = ChatStore()
|
32 |
Settings.llm = self.llm
|
33 |
+
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
|
34 |
+
Settings.embed_model = embed_model
|
35 |
|
36 |
def get_citation_engine(self, titles:List, index):
|
37 |
+
model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
|
38 |
filters = [
|
39 |
MetadataFilter(
|
40 |
key="title",
|
|
|
47 |
filters = MetadataFilters(filters=filters, condition="or")
|
48 |
|
49 |
# Create the QueryEngineTool with the index and filters
|
50 |
+
kwargs = {"similarity_top_k": 10, "filters": filters}
|
51 |
|
52 |
retriever = index.as_retriever(**kwargs)
|
53 |
|
54 |
# citation_engine = CitationQueryEngine(retriever=retriever)
|
55 |
|
56 |
+
# return CitationQueryEngine.from_args(index, retriever=retriever)
|
57 |
+
return MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal)
|
58 |
+
|
59 |
|
60 |
def get_chat_engine(self, session_id, index, titles=None, type_bot="general"):
|
61 |
# Create the QueryEngineTool based on the type
|
62 |
if type_bot == "general":
|
63 |
# query_engine = index.as_query_engine(similarity_top_k=3)
|
64 |
+
# citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
|
65 |
+
model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
|
66 |
+
retriever = index.as_retriever(similarity_top_k=10)
|
67 |
+
citation_engine = MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal)
|
68 |
+
# description = "A book containing information about medicine"
|
69 |
else:
|
70 |
citation_engine = self.get_citation_engine(titles, index)
|
71 |
+
# description = "A book containing information about medicine"
|
72 |
|
73 |
+
# metadata = ToolMetadata(name="bot-belajar", description=description)
|
|
|
74 |
|
75 |
+
# vector_query_engine = QueryEngineTool(
|
76 |
+
# query_engine=citation_engine, metadata=metadata
|
77 |
+
# )
|
78 |
+
|
79 |
+
vector_tool = QueryEngineTool.from_defaults(
|
80 |
+
query_engine=citation_engine,
|
81 |
+
name="vector_tool",
|
82 |
+
description=(
|
83 |
+
"Useful for retrieving specific context from the data from a book containing information about medicine"
|
84 |
+
),
|
85 |
)
|
86 |
+
|
87 |
|
88 |
# Initialize the OpenAI agent with the tools
|
89 |
|
90 |
+
# if type_bot == "general":
|
91 |
+
# system_prompt = SYSTEM_BOT_GENERAL_TEMPLATE
|
92 |
+
# else:
|
93 |
+
# additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
|
94 |
+
# system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
|
95 |
+
# chat_engine = OpenAIAgent.from_tools(
|
96 |
+
# tools=[vector_query_engine],
|
97 |
+
# llm=self.llm,
|
98 |
+
# memory=self.chat_store.initialize_memory_bot(session_id),
|
99 |
+
# system_prompt=system_prompt,
|
100 |
+
# )
|
101 |
+
|
102 |
if type_bot == "general":
|
103 |
+
system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE
|
104 |
else:
|
105 |
additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
|
106 |
+
system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE.format(additional_information=additional_information)
|
107 |
+
|
108 |
chat_engine = OpenAIAgent.from_tools(
|
109 |
+
tools=[vector_tool],
|
110 |
llm=self.llm,
|
111 |
memory=self.chat_store.initialize_memory_bot(session_id),
|
112 |
system_prompt=system_prompt,
|
core/chat/messaging.py
DELETED
@@ -1,63 +0,0 @@
|
|
1 |
-
# Experimental
|
2 |
-
|
3 |
-
from typing import Dict, Any, Optional, List
|
4 |
-
import asyncio
|
5 |
-
import logging
|
6 |
-
from uuid import uuid4
|
7 |
-
from anyio import ClosedResourceError
|
8 |
-
from anyio.streams.memory import MemoryObjectSendStream
|
9 |
-
|
10 |
-
from llama_index.core.callbacks.base import BaseCallbackHandler, CallbackManager
|
11 |
-
from llama_index.core.callbacks import CBEventType, EventPayload
|
12 |
-
from llama_index.core.query_engine.sub_question_query_engine import (
|
13 |
-
SubQuestionAnswerPair,
|
14 |
-
)
|
15 |
-
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
16 |
-
from pydantic import BaseModel
|
17 |
-
|
18 |
-
from core.chat import schema
|
19 |
-
|
20 |
-
from db.db import MessageSubProcessSourceEnum
|
21 |
-
from core.chat.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap
|
22 |
-
from core.chat.engine import Engine
|
23 |
-
from script.vector_db import IndexManager
|
24 |
-
from service.dto import UserPromptRequest
|
25 |
-
|
26 |
-
logger = logging.getLogger(__name__)
|
27 |
-
|
28 |
-
class StreamedMessage(BaseModel):
|
29 |
-
content: str
|
30 |
-
|
31 |
-
async def handle_chat_message(
|
32 |
-
user_message: str,
|
33 |
-
send_chan: MemoryObjectSendStream,
|
34 |
-
) -> None:
|
35 |
-
async with send_chan:
|
36 |
-
engine = Engine()
|
37 |
-
|
38 |
-
index_manager = IndexManager()
|
39 |
-
index = index_manager.load_existing_indexes()
|
40 |
-
|
41 |
-
# Retrieve the chat engine with the loaded index
|
42 |
-
chat_engine = await engine.get_chat_engine(index)
|
43 |
-
|
44 |
-
logger.debug("Engine received")
|
45 |
-
streaming_chat_response: StreamingAgentChatResponse = (
|
46 |
-
await chat_engine.astream_chat(user_message)
|
47 |
-
)
|
48 |
-
response_str = ""
|
49 |
-
async for text in streaming_chat_response.async_response_gen():
|
50 |
-
response_str += text
|
51 |
-
if send_chan._closed:
|
52 |
-
logger.debug(
|
53 |
-
"Received streamed token after send channel closed. Ignoring."
|
54 |
-
)
|
55 |
-
return
|
56 |
-
await send_chan.send(StreamedMessage(content=response_str))
|
57 |
-
|
58 |
-
if response_str.strip() == "":
|
59 |
-
await send_chan.send(
|
60 |
-
StreamedMessage(
|
61 |
-
content="Sorry, I either wasn't able to understand your question or I don't have an answer for it."
|
62 |
-
)
|
63 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/chat/schema.py
DELETED
@@ -1,162 +0,0 @@
|
|
1 |
-
# Experimental
|
2 |
-
|
3 |
-
from pydantic import BaseModel, Field, field_validator
|
4 |
-
from typing import List, Optional, Dict, Union, Any
|
5 |
-
from enum import Enum
|
6 |
-
from uuid import UUID
|
7 |
-
from datetime import datetime
|
8 |
-
from llama_index.core.schema import BaseNode, NodeWithScore
|
9 |
-
from llama_index.core.callbacks.schema import EventPayload
|
10 |
-
from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
|
11 |
-
from db.db import (
|
12 |
-
MessageRoleEnum,
|
13 |
-
MessageStatusEnum,
|
14 |
-
MessageSubProcessSourceEnum,
|
15 |
-
MessageSubProcessStatusEnum,
|
16 |
-
)
|
17 |
-
|
18 |
-
DB_DOC_ID_KEY = "db_document_id"
|
19 |
-
|
20 |
-
class Base(BaseModel):
|
21 |
-
id: Optional[UUID] = Field(None, description="Unique identifier")
|
22 |
-
created_at: Optional[datetime] = Field(None, description="Creation datetime")
|
23 |
-
updated_at: Optional[datetime] = Field(None, description="Update datetime")
|
24 |
-
|
25 |
-
class Config:
|
26 |
-
orm_mode = True
|
27 |
-
|
28 |
-
class BaseMetadataObject(BaseModel):
|
29 |
-
class Config:
|
30 |
-
orm_mode = True
|
31 |
-
|
32 |
-
class Citation(BaseMetadataObject):
|
33 |
-
document_id: UUID
|
34 |
-
text: str
|
35 |
-
page_number: int
|
36 |
-
score: Optional[float]
|
37 |
-
|
38 |
-
@field_validator("document_id")
|
39 |
-
def validate_document_id(cls, value):
|
40 |
-
if value:
|
41 |
-
return str(value)
|
42 |
-
return value
|
43 |
-
|
44 |
-
@classmethod
|
45 |
-
def from_node(cls, node_w_score: NodeWithScore) -> "Citation":
|
46 |
-
node: BaseNode = node_w_score.node
|
47 |
-
page_number = int(node.source_node.metadata["page_label"])
|
48 |
-
document_id = node.source_node.metadata[""]
|
49 |
-
return cls(
|
50 |
-
document_id=document_id,
|
51 |
-
text=node.get_content(),
|
52 |
-
page_number=page_number,
|
53 |
-
score=node_w_score.score,
|
54 |
-
)
|
55 |
-
|
56 |
-
|
57 |
-
class QuestionAnswerPair(BaseMetadataObject):
|
58 |
-
"""
|
59 |
-
A question-answer pair that is used to store the sub-questions and answers
|
60 |
-
"""
|
61 |
-
|
62 |
-
question: str
|
63 |
-
answer: Optional[str]
|
64 |
-
citations: Optional[List[Citation]] = None
|
65 |
-
|
66 |
-
@classmethod
|
67 |
-
def from_sub_question_answer_pair(
|
68 |
-
cls, sub_question_answer_pair: SubQuestionAnswerPair
|
69 |
-
):
|
70 |
-
if sub_question_answer_pair.sources is None:
|
71 |
-
citations = None
|
72 |
-
else:
|
73 |
-
citations = [
|
74 |
-
Citation.from_node(node_w_score)
|
75 |
-
for node_w_score in sub_question_answer_pair.sources
|
76 |
-
if node_w_score.node.source_node is not None
|
77 |
-
and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata
|
78 |
-
]
|
79 |
-
citations = citations or None
|
80 |
-
return cls(
|
81 |
-
question=sub_question_answer_pair.sub_q.sub_question,
|
82 |
-
answer=sub_question_answer_pair.answer,
|
83 |
-
citations=citations,
|
84 |
-
)
|
85 |
-
|
86 |
-
|
87 |
-
# later will be Union[QuestionAnswerPair, more to add later... ]
|
88 |
-
class SubProcessMetadataKeysEnum(str, Enum):
|
89 |
-
SUB_QUESTION = EventPayload.SUB_QUESTION.value
|
90 |
-
|
91 |
-
|
92 |
-
# keeping the typing pretty loose here, in case there are changes to the metadata data formats.
|
93 |
-
SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any]
|
94 |
-
|
95 |
-
|
96 |
-
class MessageSubProcess(Base):
|
97 |
-
message_id: UUID
|
98 |
-
source: MessageSubProcessSourceEnum
|
99 |
-
status: MessageSubProcessStatusEnum
|
100 |
-
metadata_map: Optional[SubProcessMetadataMap]
|
101 |
-
|
102 |
-
|
103 |
-
class Message(Base):
|
104 |
-
conversation_id: UUID
|
105 |
-
content: str
|
106 |
-
role: MessageRoleEnum
|
107 |
-
status: MessageStatusEnum
|
108 |
-
sub_processes: List[MessageSubProcess]
|
109 |
-
|
110 |
-
|
111 |
-
class UserMessageCreate(BaseModel):
|
112 |
-
content: str
|
113 |
-
|
114 |
-
class DocumentMetadataKeysEnum(str, Enum):
|
115 |
-
"""
|
116 |
-
Enum for the keys of the metadata map for a document
|
117 |
-
"""
|
118 |
-
|
119 |
-
SEC_DOCUMENT = "sec_document"
|
120 |
-
|
121 |
-
|
122 |
-
class SecDocumentTypeEnum(str, Enum):
|
123 |
-
"""
|
124 |
-
Enum for the type of sec document
|
125 |
-
"""
|
126 |
-
|
127 |
-
TEN_K = "10-K"
|
128 |
-
TEN_Q = "10-Q"
|
129 |
-
|
130 |
-
|
131 |
-
class SecDocumentMetadata(BaseModel):
|
132 |
-
"""
|
133 |
-
Metadata for a document that is a sec document
|
134 |
-
"""
|
135 |
-
|
136 |
-
company_name: str
|
137 |
-
company_ticker: str
|
138 |
-
doc_type: SecDocumentTypeEnum
|
139 |
-
year: int
|
140 |
-
quarter: Optional[int]
|
141 |
-
accession_number: Optional[str]
|
142 |
-
cik: Optional[str]
|
143 |
-
period_of_report_date: Optional[datetime]
|
144 |
-
filed_as_of_date: Optional[datetime]
|
145 |
-
date_as_of_change: Optional[datetime]
|
146 |
-
|
147 |
-
|
148 |
-
DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any]
|
149 |
-
|
150 |
-
|
151 |
-
class Document(Base):
|
152 |
-
url: str
|
153 |
-
metadata_map: Optional[DocumentMetadataMap] = None
|
154 |
-
|
155 |
-
|
156 |
-
class Conversation(Base):
|
157 |
-
messages: List[Message]
|
158 |
-
documents: List[Document]
|
159 |
-
|
160 |
-
|
161 |
-
class ConversationCreate(BaseModel):
|
162 |
-
document_ids: List[UUID]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/module_creator/__init__.py
DELETED
File without changes
|
core/multimodal.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core.query_engine import CustomQueryEngine
|
2 |
+
from llama_index.core.retrievers import BaseRetriever
|
3 |
+
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
|
4 |
+
from llama_index.core.schema import ImageNode, NodeWithScore, MetadataMode
|
5 |
+
from llama_index.core.prompts import PromptTemplate
|
6 |
+
from llama_index.core.base.response.schema import Response
|
7 |
+
from typing import Optional
|
8 |
+
from core.prompt import MULTOMODAL_QUERY_TEMPLATE
|
9 |
+
|
10 |
+
|
11 |
+
gpt_4o = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
|
12 |
+
|
13 |
+
|
14 |
+
QA_PROMPT = PromptTemplate(MULTOMODAL_QUERY_TEMPLATE)
|
15 |
+
|
16 |
+
|
17 |
+
class MultimodalQueryEngine(CustomQueryEngine):
|
18 |
+
"""Custom multimodal Query Engine.
|
19 |
+
|
20 |
+
Takes in a retriever to retrieve a set of document nodes.
|
21 |
+
Also takes in a prompt template and multimodal model.
|
22 |
+
|
23 |
+
"""
|
24 |
+
|
25 |
+
qa_prompt: PromptTemplate
|
26 |
+
retriever: BaseRetriever
|
27 |
+
multi_modal_llm: OpenAIMultiModal
|
28 |
+
|
29 |
+
def __init__(self, qa_prompt: Optional[PromptTemplate] = None, **kwargs) -> None:
|
30 |
+
"""Initialize."""
|
31 |
+
super().__init__(qa_prompt=qa_prompt or QA_PROMPT, **kwargs)
|
32 |
+
|
33 |
+
def custom_query(self, query_str: str):
|
34 |
+
# retrieve text nodes
|
35 |
+
nodes = self.retriever.retrieve(query_str)
|
36 |
+
# create ImageNode items from text nodes
|
37 |
+
|
38 |
+
image_nodes = [
|
39 |
+
NodeWithScore(node=ImageNode(image_url=link))
|
40 |
+
for n in nodes
|
41 |
+
if "image_link" in n.metadata
|
42 |
+
and n.metadata["image_link"] not in ["", []]
|
43 |
+
for link in (n.metadata["image_link"] if isinstance(n.metadata["image_link"], list) else [n.metadata["image_link"]])
|
44 |
+
if link not in ["", []]
|
45 |
+
]
|
46 |
+
|
47 |
+
print("image_nodes: {}".format(image_nodes))
|
48 |
+
|
49 |
+
# create context string from text nodes, dump into the prompt
|
50 |
+
context_str = "\n\n".join(
|
51 |
+
[r.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]
|
52 |
+
)
|
53 |
+
fmt_prompt = self.qa_prompt.format(context_str=context_str, query_str=query_str)
|
54 |
+
|
55 |
+
# synthesize an answer from formatted text and images
|
56 |
+
llm_response = self.multi_modal_llm.complete(
|
57 |
+
prompt=fmt_prompt,
|
58 |
+
image_documents=[image_node.node for image_node in image_nodes],
|
59 |
+
)
|
60 |
+
return Response(
|
61 |
+
response=str(llm_response),
|
62 |
+
source_nodes=nodes,
|
63 |
+
metadata={"text_nodes": nodes, "image_nodes": image_nodes},
|
64 |
+
)
|
core/parser.py
CHANGED
@@ -64,7 +64,6 @@ def update_response(text):
|
|
64 |
|
65 |
return text
|
66 |
|
67 |
-
|
68 |
def renumber_sources(source_list):
|
69 |
new_sources = []
|
70 |
for i, source in enumerate(source_list):
|
@@ -74,7 +73,6 @@ def renumber_sources(source_list):
|
|
74 |
new_sources.append(f"source {i+1}: {content}")
|
75 |
return new_sources
|
76 |
|
77 |
-
|
78 |
def sort_and_renumber_sources(source_list):
|
79 |
"""
|
80 |
This function takes a list of sources, sorts them based on the source number,
|
@@ -98,6 +96,7 @@ def sort_and_renumber_sources(source_list):
|
|
98 |
|
99 |
return sorted_sources
|
100 |
|
|
|
101 |
def seperate_to_list(text):
|
102 |
# Step 1: Split the text by line breaks (\n)
|
103 |
lines = text.split("\n")
|
@@ -123,7 +122,7 @@ def join_list(items):
|
|
123 |
return f"{items[0]} and {items[1]}"
|
124 |
else:
|
125 |
return ", ".join(items[:-1]) + " and " + items[-1]
|
126 |
-
|
127 |
def redesign_structure_message(message, metadata):
|
128 |
"""
|
129 |
This function replaces occurrences of '[n]' in the message
|
@@ -143,4 +142,31 @@ def redesign_structure_message(message, metadata):
|
|
143 |
# Use regex to find all citations in the format '[n]'
|
144 |
redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
|
145 |
|
146 |
-
return redesigned_message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
return text
|
66 |
|
|
|
67 |
def renumber_sources(source_list):
|
68 |
new_sources = []
|
69 |
for i, source in enumerate(source_list):
|
|
|
73 |
new_sources.append(f"source {i+1}: {content}")
|
74 |
return new_sources
|
75 |
|
|
|
76 |
def sort_and_renumber_sources(source_list):
|
77 |
"""
|
78 |
This function takes a list of sources, sorts them based on the source number,
|
|
|
96 |
|
97 |
return sorted_sources
|
98 |
|
99 |
+
|
100 |
def seperate_to_list(text):
|
101 |
# Step 1: Split the text by line breaks (\n)
|
102 |
lines = text.split("\n")
|
|
|
122 |
return f"{items[0]} and {items[1]}"
|
123 |
else:
|
124 |
return ", ".join(items[:-1]) + " and " + items[-1]
|
125 |
+
|
126 |
def redesign_structure_message(message, metadata):
|
127 |
"""
|
128 |
This function replaces occurrences of '[n]' in the message
|
|
|
142 |
# Use regex to find all citations in the format '[n]'
|
143 |
redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
|
144 |
|
145 |
+
return redesigned_message
|
146 |
+
|
147 |
+
def extract_sorted_page_numbers(content):
|
148 |
+
# Regular expression pattern to match page references like [p-166], [p-163], etc.
|
149 |
+
page_pattern = r'\[p-(\d+)\]'
|
150 |
+
|
151 |
+
# Find all matches (page numbers) in the content
|
152 |
+
page_numbers = re.findall(page_pattern, content)
|
153 |
+
|
154 |
+
# Convert the found page numbers into integers, remove duplicates, and sort them
|
155 |
+
return sorted(set(map(int, page_numbers))) # Use set to remove duplicates and sorted to sort them
|
156 |
+
|
157 |
+
# Method to filter and create a new list with the relevant page numbers [163, 165, 166]
|
158 |
+
def filter_metadata_by_pages(metadata, pages):
|
159 |
+
if pages and metadata:
|
160 |
+
combined_metadata = [{
|
161 |
+
"page_number": pages,
|
162 |
+
"title": metadata[0]["title"], # All entries share the same title
|
163 |
+
"author": metadata[0]["author"], # All entries share the same author
|
164 |
+
"category": metadata[0]["category"], # All entries share the same category
|
165 |
+
"year": metadata[0]["year"], # All entries share the same year
|
166 |
+
"publisher": metadata[0]["publisher"], # All entries share the same publisher
|
167 |
+
"reference": metadata[0]["reference"] # All entries share the same reference
|
168 |
+
}]
|
169 |
+
|
170 |
+
return combined_metadata
|
171 |
+
else:
|
172 |
+
return []
|
core/prompt.py
CHANGED
@@ -1,5 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
SYSTEM_BOT_TEMPLATE = """
|
2 |
-
Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu hanya memberikan informasi dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
|
3 |
|
4 |
**Instruksi**:
|
5 |
|
@@ -14,6 +47,121 @@ Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis
|
|
14 |
5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
|
15 |
"""
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
ADDITIONAL_INFORMATIONS = """
|
18 |
Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
|
19 |
"""
|
@@ -49,6 +197,7 @@ Kamu juga harus memperhatikan instruksi :
|
|
49 |
- "Dapatkan buku ini sekarang dan tingkatkan pemahaman Anda tentang kesehatan 😊"
|
50 |
"""
|
51 |
|
|
|
52 |
SYSTEM_TOPIC_TEMPLATE = """
|
53 |
You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
|
54 |
"""
|
@@ -147,7 +296,6 @@ Your task is to extract and organize metadata for the {class_name}. Follow the i
|
|
147 |
- **How:** Structure the entries clearly and precisely as attributes of the class.
|
148 |
- **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
|
149 |
"""
|
150 |
-
|
151 |
SUMMARIZER_SYSTEM_TEMPLATE = """
|
152 |
|
153 |
"""
|
|
|
1 |
+
PARSER_INSTRUCTION = """
|
2 |
+
You are a highly proficient language model designed to convert pages from PDF, PPT and other files into structured markdown text. Your goal is to accurately transcribe text and identify and describe images, particularly graphs and other graphical elements.
|
3 |
+
|
4 |
+
You have been tasked with creating a markdown copy of each page from the provided PDF or PPT image. You should write the number of the figure, and keep it in your markdown text. Each image description must include a full description of the content, a summary of the graphical object.
|
5 |
+
|
6 |
+
Maintain the sequence of all the elements.
|
7 |
+
|
8 |
+
For the following element, follow the requirement of extraction:
|
9 |
+
for Text:
|
10 |
+
- Extract all readable text from the page.
|
11 |
+
- Exclude any diagonal text, headers, and footers.
|
12 |
+
|
13 |
+
for Text which includes hyperlink:
|
14 |
+
-Extract hyperlink and present it with the text
|
15 |
+
|
16 |
+
for Image Identification and Description:
|
17 |
+
- Identify all images, graphs, and other graphical elements on the page.
|
18 |
+
- For each image or graph, note the figure number and include it in the description as "Figure X" where X is the figure number.
|
19 |
+
- If the image has graph , extract the graph as image . DO NOT convert it into a table or extract the wording inside the graph.
|
20 |
+
- If image contains wording that is hard to extract , flag it with <unidentifiable section> instead of parsing.
|
21 |
+
- If the image has a subtitle or caption, include it in the description.
|
22 |
+
- If the image has a organisation chart , convert it into a hierachical understandable format.
|
23 |
+
- If the image contain process flow , capture it as a whole image instead of separate into blocks of images.
|
24 |
+
|
25 |
+
for Table:
|
26 |
+
- Try to retain the columns and structure of the table and extract it into markdown format.
|
27 |
+
|
28 |
+
# OUTPUT INSTRUCTIONS
|
29 |
+
|
30 |
+
- Exclude any diagonal text, headers, and footers from the output.
|
31 |
+
- For each image and graph, provide a detailed description,caption if there's any and summary. Clearly denote the figure number for each image in the format "Figure X" if it is noticed in the context.
|
32 |
+
"""
|
33 |
+
|
34 |
SYSTEM_BOT_TEMPLATE = """
|
35 |
+
Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu hanya memberikan informasi dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. Jika bertanya tentang rangkuman, cukup rangkum apa yang kamu tahu pada konteks yang kamu miliki. Jika bertanya tentang daftar isi, pastikan kamu melihatnya dari teks pendahuluan atau content tablenya. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
|
36 |
|
37 |
**Instruksi**:
|
38 |
|
|
|
47 |
5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
|
48 |
"""
|
49 |
|
50 |
+
SYSTEM_BOT_IMAGE_TEMPLATE = """
|
51 |
+
Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu menggunakan tool, jangan mencoba coba untuk menerka jawaban dari pengetahuanmu kemudian pastikan kamu hanya memberikan informasi dari tool dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi secara singkat apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli. Selalu cantumkan citation halamannya dari konteks yang kamu ambil berdasarkan format yang ada. Yaitu menggunakan [p-no.halaman] di akhir kutipan yang kamu ambil.
|
52 |
+
|
53 |
+
**Instruksi**:
|
54 |
+
|
55 |
+
1. **Jawaban Berdasarkan Tools**: Jika pengguna bertanya tentang topik kedokteran, gunakanlah tools yang tersedia untuk memberikan jawaban. Pastikan jawabanmu relevan dan sesuai dengan informasi dari tools tersebut. Jelaskan informasi dengan jelas dan lengkap. Jika ada tabel, boleh anda tampilkan tabel nya untuk menyampaikan data data yang jelas berdasarkan konteks buku.
|
56 |
+
|
57 |
+
2. **Referensi dan Kutipan**:
|
58 |
+
- Jika di konteks terdapat gambar maka anda perlu menampilkan gambar dengan cantumkan link gambarnya yang ditampilkan di markdown**, serta tampilkan di halaman berapa gambar tersebut diambil contoh :
|
59 |
+
* Based on the images: <br>
|
60 |
+
![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/test/img_p9_1.png) --> kamu tetap harus menampilkan tampilan gambar yang di markdown sehingga menjadi :
|
61 |
+
|
62 |
+
Pada gambar :\n
|
63 |
+
![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/test/img_p9_1.png)
|
64 |
+
\nsumber : [p-10]
|
65 |
+
|
66 |
+
Jika memang tidak ada gambar pada konteks yang dibutuhkan oleh pertanyaan, anda cukup tambahkan bahwa anda tidak memiliki gambar pada konteks pertanyaan. Pastikan gambarnya sesuai dengan apa yang ditanyakan, tidak membuat gambar yang tidak sesuai dengan konteks.
|
67 |
+
|
68 |
+
- Jangan menghapus sumber kutipan baik berupa citasi atau halaman (page number) dari teks yang diberikan. Contohnya, jika teksnya adalah "Ilmu kedokteran sangat dibutuhkan [p-2]", pastikan untuk menyertakan kutipan sumbernya yaitu [p-2] dalam jawabanmu. Contoh lain: :
|
69 |
+
|
70 |
+
* Water is wet when the sky is red [p-11]. --> kamu harus tetap cantumkan [p-11] pada jawaban yang kamu generate
|
71 |
+
|
72 |
+
* source :
|
73 |
+
page_number : 12
|
74 |
+
The sky is red in the evening and blue in the morning. --> Kamu harus mengubahnya menjadi sesuai dengan page numbernya atau citationnya sehingga menjadi :
|
75 |
+
The sky is red in the evening and blue in the morning [p-12].
|
76 |
+
|
77 |
+
3. **Ketika Tidak Tahu Jawaban**: Jika pertanyaan pengguna tidak dapat dijawab dengan menggunakan tools ini, sampaikan dengan sopan bahwa kamu tidak memiliki jawaban untuk pertanyaan tersebut. Arahkan pengguna untuk mencari informasi lebih lanjut atau bertanya pada ahli di bidang kedokteran.
|
78 |
+
|
79 |
+
4. **Gaya Jawaban**: Berikan jawaban dengan gaya yang ramah dan profesional. Sampaikan informasi secara naratif agar lebih mudah dipahami. Boleh menggunakan point point dan uraiannya agar bisa menjelaskan informasi yang kompleks sehingga mudah dipahami. Gunakan kata 'dok' atau 'dokter' untuk merujuk pada dokter, dan hindari kesan monoton dengan menambahkan emotikon jika sesuai seperti 😁, 😊, 🙌, 😉, 😀, 🤔, 😇.
|
80 |
+
|
81 |
+
5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
|
82 |
+
"""
|
83 |
+
|
84 |
+
MULTOMODAL_QUERY_TEMPLATE = """\
|
85 |
+
Below is parsed text from books, available in two formats: 'markdown' (which organizes relevant diagrams as tables) and 'raw text' (preserving the rough spatial layout of the original text). Additionally, image references from the book are provided.
|
86 |
+
|
87 |
+
### Instructions:
|
88 |
+
1. **Use image information as the primary source**: Reference the **image URL** to explain your answer, if possible.
|
89 |
+
2. **Only use parsed text** (markdown or raw) **if the image does not provide a clear answer**.
|
90 |
+
3. **Always cite the page number** for any information referenced. Please give the page number after the text that you cited, the format is : [p-no.page]
|
91 |
+
4. **Provide the image inline in the answer** by linking directly to the AWS S3 image URL provided for easy viewing.
|
92 |
+
5. Ensure that the AWS link represented by [title] matches the book's title and that the link or URL is provided in the context. Never include a link or URL that is not present in the context. If no link is available, simply state: "Apologies, the image or content you are referring to is not available in this context.
|
93 |
+
|
94 |
+
### Example:
|
95 |
+
**Sources Provided:**
|
96 |
+
|
97 |
+
**Source 1:**
|
98 |
+
- Page number: 10
|
99 |
+
- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/[title]/img_p9_1.png`
|
100 |
+
- contoh judul : blue sky, so that the link should be :
|
101 |
+
- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p9_1.png`
|
102 |
+
- Text: "The sky is red in the evening and blue in the morning. [p-10]"
|
103 |
+
|
104 |
+
**Source 2:**
|
105 |
+
- Page number: 11
|
106 |
+
- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/[title]/img_p10_1.png`
|
107 |
+
- contoh judul : blue sky, so that the link should be :
|
108 |
+
- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p10_1.png`
|
109 |
+
- Text: "Water is wet when the sky is red. [p-11]"
|
110 |
+
|
111 |
+
**Query:** When is water wet?
|
112 |
+
|
113 |
+
**Answer:**
|
114 |
+
Based on the images:\n
|
115 |
+
![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p9_1.png)\n
|
116 |
+
[p-10]
|
117 |
+
and \n
|
118 |
+
![figure-11](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p10_1.png)\n,
|
119 |
+
[p=11]\n
|
120 |
+
water is wet when the sky is red in the evening [p-10, p-11].
|
121 |
+
|
122 |
+
**Sources Provided:**
|
123 |
+
Source 1:
|
124 |
+
|
125 |
+
Page number: 15
|
126 |
+
Image URL: https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p14_1.png
|
127 |
+
Text: "Plants grow best in blue light but struggle in red light."
|
128 |
+
Source 2:
|
129 |
+
|
130 |
+
Page number: 16
|
131 |
+
Image URL: https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p15_1.png
|
132 |
+
Text: "Optimal light conditions for plant growth are illustrated in Figure 16."
|
133 |
+
|
134 |
+
Query:
|
135 |
+
What color of light is best for plant growth?
|
136 |
+
|
137 |
+
Answer:
|
138 |
+
When we look in the image :\n
|
139 |
+
![figure-15](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p14_1.png)
|
140 |
+
\n[p-15]
|
141 |
+
|
142 |
+
|
143 |
+
Plants grow best under blue light, as shown in the color-coded illustration in the image [p-15].
|
144 |
+
|
145 |
+
And the optimal light condition will be shown it the figure : \n
|
146 |
+
|
147 |
+
![figure-16](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p15_1.png)
|
148 |
+
\n[p-16]
|
149 |
+
---
|
150 |
+
|
151 |
+
**Now, please answer the following query based on the sources provided:**
|
152 |
+
|
153 |
+
---
|
154 |
+
|
155 |
+
**Sources:**
|
156 |
+
{context_str}
|
157 |
+
|
158 |
+
**Query:**
|
159 |
+
{query_str}
|
160 |
+
|
161 |
+
**Answer:**
|
162 |
+
|
163 |
+
"""
|
164 |
+
|
165 |
ADDITIONAL_INFORMATIONS = """
|
166 |
Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
|
167 |
"""
|
|
|
197 |
- "Dapatkan buku ini sekarang dan tingkatkan pemahaman Anda tentang kesehatan 😊"
|
198 |
"""
|
199 |
|
200 |
+
|
201 |
SYSTEM_TOPIC_TEMPLATE = """
|
202 |
You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
|
203 |
"""
|
|
|
296 |
- **How:** Structure the entries clearly and precisely as attributes of the class.
|
297 |
- **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
|
298 |
"""
|
|
|
299 |
SUMMARIZER_SYSTEM_TEMPLATE = """
|
300 |
|
301 |
"""
|
db/database.py
CHANGED
@@ -13,9 +13,6 @@ load_dotenv()
|
|
13 |
|
14 |
SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
|
15 |
|
16 |
-
# Get the base64 encoded certificate from the environment variable
|
17 |
-
ca_cert_base64 = os.getenv("CA_CERT_BASE64")
|
18 |
-
|
19 |
# Retrieve the Base64-encoded CA certificate from the environment variable
|
20 |
ca_cert_base64 = os.getenv("CA_CERT_BASE64")
|
21 |
|
|
|
13 |
|
14 |
SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
|
15 |
|
|
|
|
|
|
|
16 |
# Retrieve the Base64-encoded CA certificate from the environment variable
|
17 |
ca_cert_base64 = os.getenv("CA_CERT_BASE64")
|
18 |
|
db/db.py
DELETED
@@ -1,124 +0,0 @@
|
|
1 |
-
# Experimental
|
2 |
-
|
3 |
-
from sqlalchemy import Column, String, Enum, ForeignKey, DateTime
|
4 |
-
from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB
|
5 |
-
from sqlalchemy.orm import relationship
|
6 |
-
from sqlalchemy.sql import func
|
7 |
-
from enum import Enum
|
8 |
-
from sqlalchemy.ext.declarative import as_declarative, declared_attr
|
9 |
-
from llama_index.core.callbacks.schema import CBEventType
|
10 |
-
|
11 |
-
|
12 |
-
# Model
|
13 |
-
@as_declarative()
|
14 |
-
class Base:
|
15 |
-
id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4())
|
16 |
-
created_at = Column(DateTime, server_default=func.now(), nullable=False)
|
17 |
-
updated_at = Column(
|
18 |
-
DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
|
19 |
-
)
|
20 |
-
|
21 |
-
__name__: str
|
22 |
-
|
23 |
-
# Generate __tablename__ automatically
|
24 |
-
@declared_attr
|
25 |
-
def __tablename__(cls) -> str:
|
26 |
-
return cls.__name__.lower()
|
27 |
-
|
28 |
-
# DB
|
29 |
-
class MessageRoleEnum(str, Enum):
|
30 |
-
user = "user"
|
31 |
-
assistant = "assistant"
|
32 |
-
|
33 |
-
|
34 |
-
class MessageStatusEnum(str, Enum):
|
35 |
-
PENDING = "PENDING"
|
36 |
-
SUCCESS = "SUCCESS"
|
37 |
-
ERROR = "ERROR"
|
38 |
-
|
39 |
-
|
40 |
-
class MessageSubProcessStatusEnum(str, Enum):
|
41 |
-
PENDING = "PENDING"
|
42 |
-
FINISHED = "FINISHED"
|
43 |
-
|
44 |
-
|
45 |
-
# python doesn't allow enums to be extended, so we have to do this
|
46 |
-
additional_message_subprocess_fields = {
|
47 |
-
"CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine",
|
48 |
-
"SUB_QUESTIONS": "sub_questions",
|
49 |
-
}
|
50 |
-
MessageSubProcessSourceEnum = Enum(
|
51 |
-
"MessageSubProcessSourceEnum",
|
52 |
-
[(event_type.name, event_type.value) for event_type in CBEventType]
|
53 |
-
+ list(additional_message_subprocess_fields.items()),
|
54 |
-
)
|
55 |
-
|
56 |
-
|
57 |
-
def to_pg_enum(enum_class) -> ENUM:
|
58 |
-
return ENUM(enum_class, name=enum_class.__name__)
|
59 |
-
|
60 |
-
|
61 |
-
class Document(Base):
|
62 |
-
"""
|
63 |
-
A document along with its metadata
|
64 |
-
"""
|
65 |
-
|
66 |
-
# URL to the actual document (e.g. a PDF)
|
67 |
-
url = Column(String, nullable=False, unique=True)
|
68 |
-
metadata_map = Column(JSONB, nullable=True)
|
69 |
-
conversations = relationship("ConversationDocument", back_populates="document")
|
70 |
-
|
71 |
-
|
72 |
-
class Conversation(Base):
|
73 |
-
"""
|
74 |
-
A conversation with messages and linked documents
|
75 |
-
"""
|
76 |
-
|
77 |
-
messages = relationship("Message", back_populates="conversation")
|
78 |
-
conversation_documents = relationship(
|
79 |
-
"ConversationDocument", back_populates="conversation"
|
80 |
-
)
|
81 |
-
|
82 |
-
|
83 |
-
class ConversationDocument(Base):
|
84 |
-
"""
|
85 |
-
A many-to-many relationship between a conversation and a document
|
86 |
-
"""
|
87 |
-
|
88 |
-
conversation_id = Column(
|
89 |
-
UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
|
90 |
-
)
|
91 |
-
document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True)
|
92 |
-
conversation = relationship("Conversation", back_populates="conversation_documents")
|
93 |
-
document = relationship("Document", back_populates="conversations")
|
94 |
-
|
95 |
-
|
96 |
-
class Message(Base):
|
97 |
-
"""
|
98 |
-
A message in a conversation
|
99 |
-
"""
|
100 |
-
|
101 |
-
conversation_id = Column(
|
102 |
-
UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
|
103 |
-
)
|
104 |
-
content = Column(String)
|
105 |
-
role = Column(to_pg_enum(MessageRoleEnum))
|
106 |
-
status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING)
|
107 |
-
conversation = relationship("Conversation", back_populates="messages")
|
108 |
-
sub_processes = relationship("MessageSubProcess", back_populates="message")
|
109 |
-
|
110 |
-
|
111 |
-
class MessageSubProcess(Base):
|
112 |
-
"""
|
113 |
-
A record of a sub-process that occurred as part of the generation of a message from an AI assistant
|
114 |
-
"""
|
115 |
-
|
116 |
-
message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True)
|
117 |
-
source = Column(to_pg_enum(MessageSubProcessSourceEnum))
|
118 |
-
message = relationship("Message", back_populates="sub_processes")
|
119 |
-
status = Column(
|
120 |
-
to_pg_enum(MessageSubProcessStatusEnum),
|
121 |
-
default=MessageSubProcessStatusEnum.FINISHED,
|
122 |
-
nullable=False,
|
123 |
-
)
|
124 |
-
metadata_map = Column(JSONB, nullable=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db/delete_data.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
from db.repository import Repository, get_db_conn
|
3 |
-
|
4 |
-
# Setup logging (configure as needed)
|
5 |
-
logging.basicConfig(level=logging.INFO)
|
6 |
-
|
7 |
-
|
8 |
-
class DeleteDatabase(Repository):
|
9 |
-
async def delete_record(self, params):
|
10 |
-
if "id" not in params:
|
11 |
-
raise ValueError("The 'id' parameter is required.")
|
12 |
-
query = """
|
13 |
-
DELETE FROM metadata
|
14 |
-
WHERE id = :id
|
15 |
-
"""
|
16 |
-
|
17 |
-
try:
|
18 |
-
await self._exec(query, params)
|
19 |
-
logging.info(f"Record with id {params['id']} deleted successfully.")
|
20 |
-
except Exception as e:
|
21 |
-
logging.error(f"Error deleting record with id {params['id']}: {e}")
|
22 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db/fetching.py
CHANGED
@@ -13,7 +13,7 @@ class DataFetching:
|
|
13 |
user_meta_entries = user_meta_query.get_user_meta_entries(self.db)
|
14 |
|
15 |
if not user_meta_entries or user_meta_entries==[]:
|
16 |
-
return
|
17 |
|
18 |
# Extract relevant data from the user_meta_entries
|
19 |
|
@@ -31,8 +31,6 @@ class DataFetching:
|
|
31 |
for user_meta, metadata, category in user_meta_entries # Unpack the tuple
|
32 |
]
|
33 |
|
34 |
-
print("Hasil akhir ", results)
|
35 |
-
|
36 |
# Extract relevant data from the user_meta_entries
|
37 |
return results
|
38 |
|
@@ -42,7 +40,6 @@ class DataFetching:
|
|
42 |
|
43 |
return [
|
44 |
MetadataResponse(
|
45 |
-
status="success",
|
46 |
id = id,
|
47 |
title=title,
|
48 |
author=author,
|
|
|
13 |
user_meta_entries = user_meta_query.get_user_meta_entries(self.db)
|
14 |
|
15 |
if not user_meta_entries or user_meta_entries==[]:
|
16 |
+
return []
|
17 |
|
18 |
# Extract relevant data from the user_meta_entries
|
19 |
|
|
|
31 |
for user_meta, metadata, category in user_meta_entries # Unpack the tuple
|
32 |
]
|
33 |
|
|
|
|
|
34 |
# Extract relevant data from the user_meta_entries
|
35 |
return results
|
36 |
|
|
|
40 |
|
41 |
return [
|
42 |
MetadataResponse(
|
|
|
43 |
id = id,
|
44 |
title=title,
|
45 |
author=author,
|
db/get_data.py
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
from db.repository import Repository, get_db_conn
|
3 |
-
from fastapi.responses import JSONResponse
|
4 |
-
|
5 |
-
# Setup logging (configure as needed)
|
6 |
-
logging.basicConfig(level=logging.INFO)
|
7 |
-
|
8 |
-
|
9 |
-
class GetDatabase(Repository):
|
10 |
-
async def execute_query(self, query, params=None, fetch_one=False):
|
11 |
-
"""
|
12 |
-
|
13 |
-
Helper function to execute SQL queries and handle exceptions.
|
14 |
-
"""
|
15 |
-
try:
|
16 |
-
print(fetch_one)
|
17 |
-
if fetch_one:
|
18 |
-
|
19 |
-
results = await self._fetch_one(query, params)
|
20 |
-
print(results)
|
21 |
-
else:
|
22 |
-
results = await self.get_by_query(query, params)
|
23 |
-
print("result execute query : ", results)
|
24 |
-
return results if results else None
|
25 |
-
except Exception as e:
|
26 |
-
logging.error(f"An error occurred while executing query: {e}")
|
27 |
-
return JSONResponse(status_code=500, content=f"An error occurred while executing query: {e}")
|
28 |
-
|
29 |
-
async def get_data(self, title):
|
30 |
-
"""
|
31 |
-
Fetch the first result matching the given title from the metadata table.
|
32 |
-
"""
|
33 |
-
query = """
|
34 |
-
SELECT * FROM metadata
|
35 |
-
WHERE title = %s
|
36 |
-
limit 5;
|
37 |
-
"""
|
38 |
-
|
39 |
-
try:
|
40 |
-
results = await self.execute_query(query, (title,), fetch_one=True)
|
41 |
-
return results
|
42 |
-
except Exception as e:
|
43 |
-
logging.error(f"An error occurred while get data: {e}")
|
44 |
-
return JSONResponse(status_code=500, content=f"An error occurred while get data: {e}")
|
45 |
-
|
46 |
-
async def get_all_data(self):
|
47 |
-
"""
|
48 |
-
Fetch all data from the metadata table.
|
49 |
-
"""
|
50 |
-
query = """
|
51 |
-
SELECT * FROM metadata
|
52 |
-
"""
|
53 |
-
results = await self.execute_query(query)
|
54 |
-
print("result", results)
|
55 |
-
return results
|
56 |
-
|
57 |
-
async def get_data_by_id(self, id):
|
58 |
-
query = f"""
|
59 |
-
SELECT * FROM metadata WHERE id = :id
|
60 |
-
"""
|
61 |
-
|
62 |
-
param = {"id" : id}
|
63 |
-
try:
|
64 |
-
results = await self.execute_query(query, param)
|
65 |
-
print('Query successful, results: %s', results)
|
66 |
-
return results[0] if results else None
|
67 |
-
except Exception as e:
|
68 |
-
print('Error fetching data by ID %s: %s', id, e)
|
69 |
-
return JSONResponse(status_code=500, content=f"An error while fething data: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db/query/base_query.py
CHANGED
@@ -53,9 +53,8 @@ class BaseQuery:
|
|
53 |
"""Delete an entry by ID with optional filter conditions."""
|
54 |
# Build the query to select the entry
|
55 |
query = select(model)
|
56 |
-
if id
|
57 |
query = query.where(model.id == id)
|
58 |
-
|
59 |
if filter_conditions:
|
60 |
query = query.where(*filter_conditions)
|
61 |
|
@@ -65,9 +64,7 @@ class BaseQuery:
|
|
65 |
return entry
|
66 |
|
67 |
# Build the delete query
|
68 |
-
delete_query = delete(model)
|
69 |
-
if id :
|
70 |
-
delete_query = delete_query.where(model.id == id)
|
71 |
if filter_conditions:
|
72 |
delete_query = delete_query.where(*filter_conditions)
|
73 |
|
|
|
53 |
"""Delete an entry by ID with optional filter conditions."""
|
54 |
# Build the query to select the entry
|
55 |
query = select(model)
|
56 |
+
if id:
|
57 |
query = query.where(model.id == id)
|
|
|
58 |
if filter_conditions:
|
59 |
query = query.where(*filter_conditions)
|
60 |
|
|
|
64 |
return entry
|
65 |
|
66 |
# Build the delete query
|
67 |
+
delete_query = delete(model).where(model.id == id)
|
|
|
|
|
68 |
if filter_conditions:
|
69 |
delete_query = delete_query.where(*filter_conditions)
|
70 |
|
db/query/query_book.py
CHANGED
@@ -41,7 +41,6 @@ class BookQuery(BaseQuery):
|
|
41 |
join_conditions=join_conditions,
|
42 |
multiple=True,
|
43 |
)
|
44 |
-
print("result", result)
|
45 |
|
46 |
return result
|
47 |
|
|
|
41 |
join_conditions=join_conditions,
|
42 |
multiple=True,
|
43 |
)
|
|
|
44 |
|
45 |
return result
|
46 |
|
db/query/query_user_meta.py
CHANGED
@@ -6,7 +6,7 @@ from db.query.base_query import BaseQuery
|
|
6 |
class UserMetaQuery(BaseQuery):
|
7 |
def __init__(self, user):
|
8 |
super().__init__(user)
|
9 |
-
|
10 |
def get_user_meta_entries(self, db):
|
11 |
"""Fetch all user meta entries joined with metadata and category."""
|
12 |
join_models = [Metadata, Category]
|
@@ -14,7 +14,7 @@ class UserMetaQuery(BaseQuery):
|
|
14 |
User_Meta.metadata_id == Metadata.id,
|
15 |
Metadata.category_id == Category.id,
|
16 |
]
|
17 |
-
|
18 |
filter_conditions = [User_Meta.user_id == self.user_id]
|
19 |
|
20 |
result = self.get_with_joins(
|
@@ -23,10 +23,10 @@ class UserMetaQuery(BaseQuery):
|
|
23 |
join_models=join_models,
|
24 |
join_conditions=join_conditions,
|
25 |
filter_conditions=filter_conditions,
|
26 |
-
multiple=True
|
27 |
)
|
28 |
return result
|
29 |
-
|
30 |
def insert_user_meta_entries(self, db, metadata_ids):
|
31 |
"""Insert new user meta entries if they don't already exist."""
|
32 |
# Fetch existing metadata IDs for the user
|
@@ -58,14 +58,13 @@ class UserMetaQuery(BaseQuery):
|
|
58 |
"metadata_ids": new_metadata_ids, # Include only new metadata IDs in the result
|
59 |
}
|
60 |
|
|
|
61 |
def update_user_meta_entries(self, db, metadata_ids):
|
62 |
"""Update user meta entries: keep, delete, or add new entries based on metadata_ids."""
|
63 |
filter_conditions = [User_Meta.user_id == self.user_id]
|
64 |
-
|
65 |
# Fetch existing user meta entries
|
66 |
-
existing_user_meta = self.get(
|
67 |
-
db, model=User_Meta, filter_conditions=filter_conditions, multiple=True
|
68 |
-
)
|
69 |
existing_user_meta = [user_meta[0] for user_meta in existing_user_meta]
|
70 |
existing_meta_ids = [entry.metadata_id for entry in existing_user_meta]
|
71 |
|
@@ -80,10 +79,7 @@ class UserMetaQuery(BaseQuery):
|
|
80 |
|
81 |
# Delete entries that are no longer in the updated metadata_ids list
|
82 |
if metadata_to_delete:
|
83 |
-
db.query(User_Meta).filter(
|
84 |
-
User_Meta.user_id == self.user_id,
|
85 |
-
User_Meta.metadata_id.in_(metadata_to_delete),
|
86 |
-
).delete(synchronize_session=False)
|
87 |
|
88 |
# Add new entries for metadata that are not in the existing user meta
|
89 |
for meta_id in metadata_to_add:
|
@@ -91,7 +87,7 @@ class UserMetaQuery(BaseQuery):
|
|
91 |
self.add(db, new_entry)
|
92 |
|
93 |
db.commit()
|
94 |
-
|
95 |
return {
|
96 |
"status": "success",
|
97 |
"added_meta": list(metadata_to_add),
|
@@ -101,15 +97,10 @@ class UserMetaQuery(BaseQuery):
|
|
101 |
|
102 |
def delete_user_meta(self, db, metadata_id):
|
103 |
"""Delete user meta entries by metadata_id."""
|
104 |
-
filter_conditions = [
|
105 |
-
|
106 |
-
User_Meta.user_id == self.user_id,
|
107 |
-
]
|
108 |
self.delete(db, model=User_Meta, filter_conditions=filter_conditions)
|
109 |
-
return {
|
110 |
-
"status": "success",
|
111 |
-
"message": f"Book user with id {metadata_id} deleted successfully.",
|
112 |
-
}
|
113 |
|
114 |
def delete_all_user_meta(self, db):
|
115 |
"""Delete all user meta entries for a user."""
|
|
|
6 |
class UserMetaQuery(BaseQuery):
|
7 |
def __init__(self, user):
|
8 |
super().__init__(user)
|
9 |
+
|
10 |
def get_user_meta_entries(self, db):
|
11 |
"""Fetch all user meta entries joined with metadata and category."""
|
12 |
join_models = [Metadata, Category]
|
|
|
14 |
User_Meta.metadata_id == Metadata.id,
|
15 |
Metadata.category_id == Category.id,
|
16 |
]
|
17 |
+
|
18 |
filter_conditions = [User_Meta.user_id == self.user_id]
|
19 |
|
20 |
result = self.get_with_joins(
|
|
|
23 |
join_models=join_models,
|
24 |
join_conditions=join_conditions,
|
25 |
filter_conditions=filter_conditions,
|
26 |
+
multiple=True
|
27 |
)
|
28 |
return result
|
29 |
+
|
30 |
def insert_user_meta_entries(self, db, metadata_ids):
|
31 |
"""Insert new user meta entries if they don't already exist."""
|
32 |
# Fetch existing metadata IDs for the user
|
|
|
58 |
"metadata_ids": new_metadata_ids, # Include only new metadata IDs in the result
|
59 |
}
|
60 |
|
61 |
+
|
62 |
def update_user_meta_entries(self, db, metadata_ids):
|
63 |
"""Update user meta entries: keep, delete, or add new entries based on metadata_ids."""
|
64 |
filter_conditions = [User_Meta.user_id == self.user_id]
|
65 |
+
|
66 |
# Fetch existing user meta entries
|
67 |
+
existing_user_meta = self.get(db, model=User_Meta, filter_conditions=filter_conditions, multiple=True)
|
|
|
|
|
68 |
existing_user_meta = [user_meta[0] for user_meta in existing_user_meta]
|
69 |
existing_meta_ids = [entry.metadata_id for entry in existing_user_meta]
|
70 |
|
|
|
79 |
|
80 |
# Delete entries that are no longer in the updated metadata_ids list
|
81 |
if metadata_to_delete:
|
82 |
+
db.query(User_Meta).filter(User_Meta.user_id == self.user_id, User_Meta.metadata_id.in_(metadata_to_delete)).delete(synchronize_session=False)
|
|
|
|
|
|
|
83 |
|
84 |
# Add new entries for metadata that are not in the existing user meta
|
85 |
for meta_id in metadata_to_add:
|
|
|
87 |
self.add(db, new_entry)
|
88 |
|
89 |
db.commit()
|
90 |
+
|
91 |
return {
|
92 |
"status": "success",
|
93 |
"added_meta": list(metadata_to_add),
|
|
|
97 |
|
98 |
def delete_user_meta(self, db, metadata_id):
|
99 |
"""Delete user meta entries by metadata_id."""
|
100 |
+
filter_conditions = [User_Meta.metadata_id==metadata_id,
|
101 |
+
User_Meta.user_id==self.user_id]
|
|
|
|
|
102 |
self.delete(db, model=User_Meta, filter_conditions=filter_conditions)
|
103 |
+
return {"message": f"Book user with id {metadata_id} deleted successfully."}
|
|
|
|
|
|
|
104 |
|
105 |
def delete_all_user_meta(self, db):
|
106 |
"""Delete all user meta entries for a user."""
|
db/save_data.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
from databases import Database
|
2 |
-
import logging
|
3 |
-
from dotenv import load_dotenv
|
4 |
-
from db.repository import Repository
|
5 |
-
|
6 |
-
|
7 |
-
load_dotenv()
|
8 |
-
|
9 |
-
|
10 |
-
class InsertDatabase(Repository):
|
11 |
-
|
12 |
-
# Example function to insert data asynchronously
|
13 |
-
async def insert_data(self, params, category_id):
|
14 |
-
# SQL insert query with named placeholders
|
15 |
-
query = """
|
16 |
-
INSERT INTO metadata (title, category_id, author, year, publisher)
|
17 |
-
VALUES (:title, :category_id, :author, :year, :publisher)
|
18 |
-
"""
|
19 |
-
|
20 |
-
reference = {
|
21 |
-
"title": params["title"],
|
22 |
-
"category_id": category_id, # directly assign category_id
|
23 |
-
"author": params["author"],
|
24 |
-
"year": params["year"],
|
25 |
-
"publisher": params["publisher"]
|
26 |
-
}
|
27 |
-
|
28 |
-
|
29 |
-
print(reference)
|
30 |
-
try:
|
31 |
-
# Execute the query with the provided values
|
32 |
-
await self._exec(query, reference)
|
33 |
-
logging.info(
|
34 |
-
f"Data inserted successfully: {reference['title']}, {reference['author']}"
|
35 |
-
)
|
36 |
-
except Exception as e:
|
37 |
-
# Log any errors that occur during the database insert operation
|
38 |
-
logging.error(f"Failed to insert data: {e}")
|
39 |
-
raise # Re-raise the exception to allow further handling if needed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db/update_data.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
from db.repository import Repository, get_db_conn
|
3 |
-
|
4 |
-
# Setup logging (configure as needed)
|
5 |
-
logging.basicConfig(level=logging.INFO)
|
6 |
-
|
7 |
-
|
8 |
-
class UpdateDatabase(Repository):
|
9 |
-
async def update_record(self, reference):
|
10 |
-
print("update record", reference)
|
11 |
-
if "id" not in reference:
|
12 |
-
raise ValueError("The 'id' parameter is required.")
|
13 |
-
query = """
|
14 |
-
UPDATE metadata
|
15 |
-
SET title = :title,
|
16 |
-
category_id = :category_id,
|
17 |
-
author = :author,
|
18 |
-
year = :year,
|
19 |
-
publisher = :publisher
|
20 |
-
WHERE id = :id
|
21 |
-
"""
|
22 |
-
print(query)
|
23 |
-
|
24 |
-
print(reference)
|
25 |
-
|
26 |
-
try:
|
27 |
-
await self._exec(query, reference)
|
28 |
-
logging.info(
|
29 |
-
f"Record with id {reference['id']} updated successfully."
|
30 |
-
)
|
31 |
-
except Exception as e:
|
32 |
-
logging.error(
|
33 |
-
f"Error updating record with id {reference['id']}: {e}"
|
34 |
-
)
|
35 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
helper/bot_function.py
DELETED
File without changes
|
helper/db_function.py
DELETED
File without changes
|
requirements.txt
CHANGED
@@ -53,12 +53,6 @@ joblib==1.4.2
|
|
53 |
jose==1.0.0
|
54 |
jsonpatch==1.33
|
55 |
jsonpointer==3.0.0
|
56 |
-
kubernetes==30.1.0
|
57 |
-
langchain==0.3.0
|
58 |
-
langchain-community==0.3.0
|
59 |
-
langchain-core==0.3.1
|
60 |
-
langchain-openai==0.2.0
|
61 |
-
langchain-text-splitters==0.3.0
|
62 |
langchainhub==0.1.21
|
63 |
langfuse==2.48.1
|
64 |
langsmith==0.1.123
|
|
|
53 |
jose==1.0.0
|
54 |
jsonpatch==1.33
|
55 |
jsonpointer==3.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
langchainhub==0.1.21
|
57 |
langfuse==2.48.1
|
58 |
langsmith==0.1.123
|
research/delete.ipynb
CHANGED
@@ -33,7 +33,7 @@
|
|
33 |
},
|
34 |
{
|
35 |
"cell_type": "code",
|
36 |
-
"execution_count":
|
37 |
"metadata": {},
|
38 |
"outputs": [
|
39 |
{
|
@@ -54,17 +54,19 @@
|
|
54 |
"\n",
|
55 |
"load_dotenv()\n",
|
56 |
"\n",
|
57 |
-
"api_key = os.getenv(\"PINECONE_API_KEY\")\n",
|
58 |
-
"\n",
|
59 |
"pc = Pinecone(api_key=api_key)\n",
|
60 |
-
"index = pc.Index(\"summarizer-semantic-index\")\n",
|
|
|
61 |
"\n",
|
62 |
-
"random_vector = [random.uniform(0, 1) for _ in range(1536)]\n",
|
|
|
63 |
"results = index.query(\n",
|
64 |
" vector=random_vector,\n",
|
65 |
" top_k=10000,\n",
|
66 |
" filter={\n",
|
67 |
-
" \"
|
68 |
" },\n",
|
69 |
")\n",
|
70 |
"\n",
|
|
|
33 |
},
|
34 |
{
|
35 |
"cell_type": "code",
|
36 |
+
"execution_count": null,
|
37 |
"metadata": {},
|
38 |
"outputs": [
|
39 |
{
|
|
|
54 |
"\n",
|
55 |
"load_dotenv()\n",
|
56 |
"\n",
|
57 |
+
"# api_key = os.getenv(\"PINECONE_API_KEY\")\n",
|
58 |
+
"api_key = \"pcsk_aZM8H_P9cK1nfUghBNJfiAhvRM6zgfgiBsHhtJDwydZaXZp47pKSQBFP6J7rmVPwqDYHW\"\n",
|
59 |
"pc = Pinecone(api_key=api_key)\n",
|
60 |
+
"# index = pc.Index(\"summarizer-semantic-index\")\n",
|
61 |
+
"index = pc.Index(\"multimedika\")\n",
|
62 |
"\n",
|
63 |
+
"# random_vector = [random.uniform(0, 1) for _ in range(1536)]\n",
|
64 |
+
"random_vector = [random.uniform(0, 1) for _ in range(768)]\n",
|
65 |
"results = index.query(\n",
|
66 |
" vector=random_vector,\n",
|
67 |
" top_k=10000,\n",
|
68 |
" filter={\n",
|
69 |
+
" \"url\": {\"$eq\": \"test\"},\n",
|
70 |
" },\n",
|
71 |
")\n",
|
72 |
"\n",
|
research/llama_parse.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|