Spaces:

Multimedika
/

Bot_Development

Runtime error

App Files Files Community

dsmultimedika commited on Dec 27, 2024

Commit

0767396

1 Parent(s): d879d77

fix : update code

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +11 -1
api/events.py +1 -2
api/function.py +10 -13
api/router/book.py +23 -23
api/router/book_collection.py +5 -28
api/router/bot.py +0 -92
api/router/bot_general.py +14 -4
api/router/bot_one.py +3 -2
api/router/bot_specific.py +2 -1
api/router/category.py +3 -2
api/router/testing.py +71 -45
api/router/topic.py +0 -69
api/router/user.py +99 -28
api/util/util.py +0 -0
app.py +2 -2
config.py +1 -1
controller/__init__.py +0 -0
controller/book_collection_controller.py +0 -0
controller/book_controller.py +0 -0
controller/bot_general_controller.py +0 -0
controller/bot_one_controller.py +0 -0
controller/bot_specific_controller.py +0 -0
controller/category_controller.py +0 -0
controller/user_controller.py +0 -0
core/book_enabler/__init__.py +0 -0
core/chat/bot_service.py +25 -3
core/chat/bot_service_multimodal.py +169 -0
core/chat/chatstore.py +47 -40
core/chat/engine.py +45 -14
core/chat/messaging.py +0 -63
core/chat/schema.py +0 -162
core/module_creator/__init__.py +0 -0
core/multimodal.py +64 -0
core/parser.py +30 -4
core/prompt.py +150 -2
db/database.py +0 -3
db/db.py +0 -124
db/delete_data.py +0 -22
db/fetching.py +1 -4
db/get_data.py +0 -69
db/query/base_query.py +2 -5
db/query/query_book.py +0 -1
db/query/query_user_meta.py +12 -21
db/save_data.py +0 -39
db/update_data.py +0 -35
helper/bot_function.py +0 -0
helper/db_function.py +0 -0
requirements.txt +0 -6
research/delete.ipynb +8 -6
research/llama_parse.ipynb +0 -0

.gitignore CHANGED Viewed

@@ -398,4 +398,14 @@ FodyWeavers.xsd
 *.sln.iml
 .env
-*.pem

 *.sln.iml
 .env
+*.pem
+*.ipynb
+*.json
+# Ignore directories and specific folders
+/research/
+# Auto Generated PWA files
+**/public/sw.js
+**/public/workbox-*.js

api/events.py CHANGED Viewed

@@ -7,8 +7,6 @@ from db.database import engine
 from db.models import Base
 from llama_index.core import set_global_handler
 load_dotenv()
@@ -23,6 +21,7 @@ async def startup() -> None:
 async def shutdown() -> None:
     pass
 def register_events(app: FastAPI) -> FastAPI:
     app.add_event_handler("startup", startup)
     app.add_event_handler("shutdown", shutdown)

 from db.models import Base
 from llama_index.core import set_global_handler
 load_dotenv()
 async def shutdown() -> None:
     pass
 def register_events(app: FastAPI) -> FastAPI:
     app.add_event_handler("startup", startup)
     app.add_event_handler("shutdown", shutdown)

api/function.py CHANGED Viewed

@@ -13,7 +13,7 @@ from script.document_uploader import Uploader
 from script.vector_db import IndexManager
 from service.aws_loader import Loader
 from service.dto import BotResponseStreaming
 load_dotenv()
@@ -21,7 +21,7 @@ load_dotenv()
 logging.basicConfig(level=logging.INFO)
-async def data_ingestion(reference, file: UploadFile) -> Any:
     try:
         # Assuming you have a Langfuse callback handler
         langfuse_callback_handler = LlamaIndexCallbackHandler()
@@ -29,33 +29,31 @@ async def data_ingestion(reference, file: UploadFile) -> Any:
             user_id="admin_book_uploaded",
         )
-        uploader = Uploader(reference, file)
         nodes_with_metadata, file_stream = await uploader.process_documents()
         # Build indexes using IndexManager
         index = IndexManager()
         index.build_indexes(nodes_with_metadata)
-        # # Upload to AWS
         file_name = f"{reference['title']}"
         aws_loader = Loader()
-        # file_obj = file
         aws_loader.upload_to_s3(file_stream, file_name)
         return json.dumps(
             {"status": "success", "message": "Vector Index loaded successfully."}
         )
     except Exception as e:
-        # Log the error and raise HTTPException for FastAPI
         logging.error("An error occurred in data ingestion: %s", e)
-        return JSONResponse(
-            status_code=500,
-            content="An internal server error occurred in data ingestion.",
-        )
 async def generate_streaming_completion(user_request, session_id):
     try:
@@ -64,7 +62,6 @@ async def generate_streaming_completion(user_request, session_id):
         # Load existing indexes
         index = index_manager.load_existing_indexes()
         # Retrieve the chat engine with the loaded index
         chat_engine = engine.get_chat_engine(index, session_id)
         # Generate completion response

 from script.vector_db import IndexManager
 from service.aws_loader import Loader
 from service.dto import BotResponseStreaming
+from utils.error_handlers import handle_exception
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
+async def data_ingestion(reference, file: UploadFile, lang: str = "en") -> Any:
     try:
         # Assuming you have a Langfuse callback handler
         langfuse_callback_handler = LlamaIndexCallbackHandler()
             user_id="admin_book_uploaded",
         )
+        uploader = Uploader(reference, file, lang)
         nodes_with_metadata, file_stream = await uploader.process_documents()
+        if isinstance(nodes_with_metadata, JSONResponse):
+            return nodes_with_metadata  # Return the error response directly
         # Build indexes using IndexManager
         index = IndexManager()
         index.build_indexes(nodes_with_metadata)
+        # Upload AWS
         file_name = f"{reference['title']}"
         aws_loader = Loader()
         aws_loader.upload_to_s3(file_stream, file_name)
         return json.dumps(
             {"status": "success", "message": "Vector Index loaded successfully."}
         )
     except Exception as e:
+        # Log the error
         logging.error("An error occurred in data ingestion: %s", e)
+        # Use handle_exception for structured error handling
+        return handle_exception(e)
 async def generate_streaming_completion(user_request, session_id):
     try:
         # Load existing indexes
         index = index_manager.load_existing_indexes()
         # Retrieve the chat engine with the loaded index
         chat_engine = engine.get_chat_engine(index, session_id)
         # Generate completion response

api/router/book.py CHANGED Viewed

@@ -18,6 +18,7 @@ from config import MYSQL_CONFIG
 from utils.error_handlers import handle_exception
 from script.vector_db import IndexManager
 from service.dto import MetadataResponse
 from sqlalchemy.orm import Session
 from sqlalchemy.future import select
@@ -37,9 +38,7 @@ async def get_metadata(user: user_dependency, db: db_dependency):
     try:
         # Join Metadata with Category to get the category name
         fetching = DataFetching(user, db)
-        # print(fetching)
         metadata_fetching = fetching.metadata_fetching()
-        # print(metadata_fetching)
         # Transform results into MetadataResponse model with optional thumbnail handling
         return metadata_fetching
@@ -58,13 +57,15 @@ async def upload_file(
     year: int = Form(...),
     publisher: str = Form(...),
     file: UploadFile = File(...),
     thumbnail: Optional[UploadFile] = File(None),
 ):
     auth_response = check_admin_authentication(user)
     if auth_response:
         return auth_response
     # Query the category based on category_id
     category_query = CategoryQuery(user)
@@ -80,28 +81,32 @@ async def upload_file(
         }
         # Process the file and handle data ingestion
-        response = await data_ingestion(reference, file)
         # Create a new Metadata object
         book_query = BookQuery(user)
         book_query.add_book(db, title, author, category_id, year, publisher)
         logging.info("Database Inserted")
         return {
-            "status": "success",
             "filename": file.filename,
             "response": response,
             "info": "upload file successfully",
         }
     except Exception as e:
-        return {
-            "status": "error",
-            "filename": "",
-            "response": "",
-            "info": "upload file failed",
-            "error_message":handle_exception(e)
-        }
 @router.put("/book/{metadata_id}")
 async def update_metadata(
@@ -153,6 +158,7 @@ async def update_metadata(
         # Update existing metadata entry
         metadata = db.query(Metadata).filter(Metadata.id == metadata_id).first()
         if not metadata:
             return JSONResponse(status_code=404, content="Metadata not found")
@@ -160,10 +166,11 @@ async def update_metadata(
         updated_metadata = book_query.update_metadata_entry(
             db, metadata_id, title, author, category_id, year, publisher
         )
         updated_category = category_query.get_category(db, updated_metadata.category_id)
         return MetadataResponse(
-            status="success",
             id=metadata_id,
             title=updated_metadata.title,
             author=updated_metadata.author,
@@ -179,10 +186,7 @@ async def update_metadata(
         )
     except Exception as e:
-        return {
-            "status":"error",
-            "error_message":handle_exception(e)
-        }
 @router.delete("/book/{metadata_id}")
 async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id: int):
@@ -203,12 +207,8 @@ async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id:
         db.delete(metadata)
         db.commit()
-        return {"status": "delete successfully"}
     except Exception as e:
-        return {
-            "status": "error",
-            "message": "delete failed",
-            "error_message": handle_exception(e)
-        }

 from utils.error_handlers import handle_exception
 from script.vector_db import IndexManager
 from service.dto import MetadataResponse
+from service.aws_loader import Loader
 from sqlalchemy.orm import Session
 from sqlalchemy.future import select
     try:
         # Join Metadata with Category to get the category name
         fetching = DataFetching(user, db)
         metadata_fetching = fetching.metadata_fetching()
         # Transform results into MetadataResponse model with optional thumbnail handling
         return metadata_fetching
     year: int = Form(...),
     publisher: str = Form(...),
     file: UploadFile = File(...),
+    lang: str = Form(None),
     thumbnail: Optional[UploadFile] = File(None),
 ):
     auth_response = check_admin_authentication(user)
     if auth_response:
         return auth_response
+    # Restrict `lang` to only "id" or "en"
+    lang = lang if lang in {"id", "en"} else "en"
     # Query the category based on category_id
     category_query = CategoryQuery(user)
         }
         # Process the file and handle data ingestion
+        response = await data_ingestion(reference, file, lang)
+        if isinstance(response, JSONResponse):
+            return response  # Return the error response directly
+        if thumbnail:
+            file_name = f"{reference['title']}"
+            aws_loader = Loader()
+            ekstensi_file = file.filename.split(".")[-1].lower()
+            aws_loader.upload_image_to_s3(file=thumbnail, custom_name=f"{file_name}.{ekstensi_file}")
         # Create a new Metadata object
         book_query = BookQuery(user)
         book_query.add_book(db, title, author, category_id, year, publisher)
         logging.info("Database Inserted")
         return {
             "filename": file.filename,
             "response": response,
             "info": "upload file successfully",
         }
     except Exception as e:
+        return handle_exception(e)
 @router.put("/book/{metadata_id}")
 async def update_metadata(
         # Update existing metadata entry
         metadata = db.query(Metadata).filter(Metadata.id == metadata_id).first()
+        print(metadata)
         if not metadata:
             return JSONResponse(status_code=404, content="Metadata not found")
         updated_metadata = book_query.update_metadata_entry(
             db, metadata_id, title, author, category_id, year, publisher
         )
+        print(updated_metadata)
         updated_category = category_query.get_category(db, updated_metadata.category_id)
+        print(updated_category)
         return MetadataResponse(
             id=metadata_id,
             title=updated_metadata.title,
             author=updated_metadata.author,
         )
     except Exception as e:
+        return handle_exception(e)
 @router.delete("/book/{metadata_id}")
 async def delete_metadata(user: user_dependency, db: db_dependency, metadata_id: int):
         db.delete(metadata)
         db.commit()
+        return {"Status": "delete successfully"}
     except Exception as e:
+        return handle_exception(e)

api/router/book_collection.py CHANGED Viewed

@@ -30,11 +30,7 @@ async def get_book_collection(user: user_dependency, db: db_dependency):
             "book_collection": book_collection,
         }
     except Exception as e:
-        return {
-            "status": "error",
-            "book_collection": [],
-            "error_message": handle_exception(e)
-        }
 @router.post("/book_collection")
@@ -52,12 +48,7 @@ async def request_book_collection(
         return user_meta_query.insert_user_meta_entries(db, metadata_ids)
     except Exception as e:
-        return {
-            "status": "error",
-            "message": "User meta entries failed to added.",
-            "metadata_ids": [],  # Include the metadata IDs in the result
-            "error_message":handle_exception(e)
-        }
 @router.put("/book_collection")
@@ -74,13 +65,7 @@ async def update_book_collection(
         return user_meta_query.update_user_meta_entries(db, metadata_ids)
     except Exception as e:
-        return {
-            "status": "error",
-            "added_meta": [],
-            "deleted_meta": [],
-            "kept_meta": [],
-            "message":handle_exception(e)
-        }
 @router.delete("/book_collection/{metadata_id}")
@@ -98,11 +83,7 @@ async def delete_book_collection(
             db, metadata_id=metadata_id
         )
     except Exception as e:
-        return {
-            "status": "error",
-            "message": f"Book user with id {metadata_id} deleted successfully.",
-            "error_message": handle_exception(e)
-            }
 @router.delete("/all_collections")
@@ -121,8 +102,4 @@ async def delete_all_book(user: user_dependency, db: db_dependency):
             "message": f"Deleted book collection for user {user.get('id')}",
         }
     except Exception as e:
-        return {
-            "status": "error",
-            "message": f"Delete failed for user {user.get('id')}",
-            "error_message": handle_exception(e)
-        }

             "book_collection": book_collection,
         }
     except Exception as e:
+        return handle_exception(e)
 @router.post("/book_collection")
         return user_meta_query.insert_user_meta_entries(db, metadata_ids)
     except Exception as e:
+        return handle_exception(e)
 @router.put("/book_collection")
         return user_meta_query.update_user_meta_entries(db, metadata_ids)
     except Exception as e:
+        return handle_exception(e)
 @router.delete("/book_collection/{metadata_id}")
             db, metadata_id=metadata_id
         )
     except Exception as e:
+        return handle_exception(e)
 @router.delete("/all_collections")
             "message": f"Deleted book collection for user {user.get('id')}",
         }
     except Exception as e:
+        return handle_exception(e)

api/router/bot.py DELETED Viewed

@@ -1,92 +0,0 @@
-from fastapi import APIRouter, HTTPException, Depends
-from service.dto import UserPromptRequest, BotResponse
-from core.chat.chatstore import ChatStore
-from api.function import (
-    generate_streaming_completion,
-    generate_completion_non_streaming,
-)
-from sse_starlette.sse import EventSourceResponse
-from utils.utils import generate_uuid
-router = APIRouter(tags=["Bot"])
-def get_chat_store():
-    return ChatStore()
-@router.post("/bot/new")
-async def create_new_session():
-    session_id = generate_uuid()
-    return {"session_id" : session_id}
-@router.get("/bot/{session_id}")
-async def get_session_id(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
-    chat_history = chat_store.get_messages(session_id)
-    if not chat_history:
-        raise HTTPException(status_code=404, detail="Session not found or empty.")
-    return chat_history
-@router.get("/bot")
-async def get_all_session_ids():
-    try:
-        chat_store = ChatStore()
-        all_keys = chat_store.get_keys()
-        print(all_keys)
-        return all_keys
-    except Exception as e:
-    # Log the error and raise HTTPException for FastAPI
-        print(f"An error occurred in update data.: {e}")
-        raise HTTPException(
-            status_code=400, detail="the error when get all session ids"
-        )
-@router.post("/bot/{session_id}")
-async def bot_generator_general(user_prompt_request: UserPromptRequest):
-    if user_prompt_request.streaming:
-        return EventSourceResponse(
-            generate_streaming_completion(
-                user_prompt_request.prompt, user_prompt_request.streaming
-            )
-        )
-    else:
-        response, raw_references, references, metadata, scores = (
-            generate_completion_non_streaming(
-                user_prompt_request.session_id, user_prompt_request.prompt, user_prompt_request.streaming
-            )
-        )
-        return BotResponse(
-            content=response,
-            raw_references=raw_references,
-            references=references,
-            metadata=metadata,
-            scores=scores,
-        )
-@router.post("/bot/{category_id}/{title}") #Ganti router
-async def bot_generator_spesific(
-    category_id: int, title: str, user_prompt_request: UserPromptRequest
-):
-    pass
-@router.delete("/bot/{session_id}")
-async def delete_bot(session_id: str, chat_store: ChatStore = Depends(get_chat_store)):
-    try:
-        chat_store.delete_messages(session_id)
-        return {"info": f"Delete {session_id} successful"}
-    except Exception as e:
-        # Log the error and raise HTTPException for FastAPI
-        print(f"An error occurred in update data.: {e}")
-        raise HTTPException(
-            status_code=400, detail="the error when deleting message"
-        )
-@router.get("/bot/{category_id}/{title}")
-async def get_favourite_data(category_id: int, title: str, human_template):
-    pass

api/router/bot_general.py CHANGED Viewed

@@ -27,7 +27,10 @@ def get_chat_store():
 @router.post("/bot_general/new")
-async def create_session_general():
     session_id = generate_uuid()
     return {"session_id": session_id}
@@ -54,9 +57,12 @@ async def get_session_id(
 @router.post("/bot/{session_id}")
 async def bot_generator_general(
-    session_id: str, user_prompt_request: UserPromptRequest
 ):
     langfuse_callback_handler = LlamaIndexCallbackHandler()
     langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
@@ -77,8 +83,12 @@ async def bot_generator_general(
 @router.delete("/bot/{session_id}")
 async def delete_bot(
-    db: db_dependency, session_id: str, chat_store: ChatStore = Depends(get_chat_store)
 ):
     try:
         chat_store.delete_messages(session_id)
         # Delete session from database

 @router.post("/bot_general/new")
+async def create_session_general(user: user_dependency):
+    auth_response = check_user_authentication(user)
+    if auth_response:
+        return auth_response
     session_id = generate_uuid()
     return {"session_id": session_id}
 @router.post("/bot/{session_id}")
 async def bot_generator_general(
+    user: user_dependency,session_id: str, user_prompt_request: UserPromptRequest
 ):
+    auth_response = check_user_authentication(user)
+    if auth_response:
+        return auth_response
     langfuse_callback_handler = LlamaIndexCallbackHandler()
     langfuse_callback_handler.set_trace_params(user_id="guest", session_id=session_id)
 @router.delete("/bot/{session_id}")
 async def delete_bot(
+    user: user_dependency,db: db_dependency, session_id: str, chat_store: ChatStore = Depends(get_chat_store)
 ):
+    auth_response = check_user_authentication(user)
+    if auth_response:
+        return auth_response
     try:
         chat_store.delete_messages(session_id)
         # Delete session from database

api/router/bot_one.py CHANGED Viewed

@@ -6,7 +6,8 @@ import pytz
 from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
 from datetime import datetime
 from core.chat.chatstore import ChatStore
-from core.chat.bot_service import ChatCompletionService
 from db.database import get_db
 from db.models import Session_Publisher
 from db.query.query_book import BookQuery
@@ -139,7 +140,7 @@ async def get_all_session_bot_one(
                 "id": session.id,
                 "bot_name": session.bot_name,
                 "updated_at": str(session.updated_at),
-                "last_message": chat_store.get_last_message_mongodb(session.id),
             }
             for session in sessions
         ]

 from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
 from datetime import datetime
 from core.chat.chatstore import ChatStore
+# from core.chat.bot_service import ChatCompletionService
+from core.chat.bot_service_multimodal import ChatCompletionService
 from db.database import get_db
 from db.models import Session_Publisher
 from db.query.query_book import BookQuery
                 "id": session.id,
                 "bot_name": session.bot_name,
                 "updated_at": str(session.updated_at),
+                "last_message": chat_store.get_last_message_mongodb(session.id)
             }
             for session in sessions
         ]

api/router/bot_specific.py CHANGED Viewed

@@ -10,7 +10,8 @@ from sqlalchemy.exc import SQLAlchemyError, NoResultFound
 from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
 from core.chat.chatstore import ChatStore
-from core.chat.bot_service import ChatCompletionService
 from db.database import get_db
 from db.models import Bot_Meta, Bot, Metadata
 from db.models import Session as SessionModel

 from service.dto import UserPromptRequest, BotResponse, BotCreateRequest
 from core.chat.chatstore import ChatStore
+# from core.chat.bot_service import ChatCompletionService
+from core.chat.bot_service_multimodal import ChatCompletionService
 from db.database import get_db
 from db.models import Bot_Meta, Bot, Metadata
 from db.models import Session as SessionModel

api/router/category.py CHANGED Viewed

@@ -85,8 +85,9 @@ async def create_category(user: user_dependency, db: db_dependency, category: Ca
         # Check if category already exists
         category_query = CategoryQuery(user)
         existing_category = category_query.get_existing_category(db, category.category_name)
-        if not isinstance(existing_category,JSONResponse):
-            return JSONResponse(status_code=400, content="Category already exists")
         # Add category
         category_query.add_category(db, category.category_name)

         # Check if category already exists
         category_query = CategoryQuery(user)
         existing_category = category_query.get_existing_category(db, category.category_name)
+        print(existing_category)
+        if existing_category:  # Check if the category already exists
+            return JSONResponse(status_code=400, content={"error": "Category already exists"})
         # Add category
         category_query.add_category(db, category.category_name)

api/router/testing.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from fastapi import FastAPI, HTTPException, Depends, Form
-from fastapi.security import OAuth2PasswordBearer
 import httpx
 import os
 from dotenv import load_dotenv
@@ -9,63 +8,88 @@ load_dotenv()
 app = FastAPI()
-# Bearer token for API authentication
 BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
-# Base URL for the Meducine API
 BASE_URL = os.getenv("BASE_URL")
-# OAuth2PasswordBearer provides the token as a dependency
-oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/login")
-@app.post("/login")
 async def login(email: str = Form(...), password: str = Form(...)):
-    async with httpx.AsyncClient() as client:
-        try:
-            response = await client.post(
-                f"{BASE_URL}/actions/meducine-restapi/auth/login",
-                data={"email": email, "password": password},
-                headers={"Authorization": f"Bearer {BEARER_TOKEN}"}
-            )
-            response.raise_for_status()  # Raise an error for bad responses (4xx or 5xx)
-            return handle_response(response)  # Assuming this function formats the response correctly
-        except httpx.HTTPStatusError as e:
-            raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-@app.post("/actions/meducine-restapi/auth/logout")
-async def logout(email: str = Form(...), password: str = Form(...)):
-    async with httpx.AsyncClient() as client:
-        response = await client.post(
-            f"{BASE_URL}/actions/meducine-restapi/auth/logout",
             data={"email": email, "password": password},
-            headers={"Authorization": f"Bearer {BEARER_TOKEN}"}
         )
-    return handle_response(response)
-@app.get("/actions/meducine-restapi/auth/identity")
-async def get_identity(token: str = Depends(oauth2_scheme)):
-    async with httpx.AsyncClient() as client:
-        response = await client.get(
-            f"{BASE_URL}/actions/meducine-restapi/auth/identity",
-            headers={"Authorization": f"Bearer {token}"}
         )
-    return handle_response(response)
-@app.get("/actions/meducine-restapi/user/has-premium-access")
-async def check_premium_access(feature: str, token: str = Depends(oauth2_scheme)):
-    async with httpx.AsyncClient() as client:
-        response = await client.get(
-            f"{BASE_URL}/actions/meducine-restapi/user/has-premium-access",
             params={"feature": feature},
-            headers={"Authorization": f"Bearer {token}"}
         )
-    return handle_response(response)
 def handle_response(response: httpx.Response):
     """
-    Handles the response from the Meducine API, returning appropriate responses based on status codes.
     """
     if response.status_code in range(200, 300):
         return response.json()  # Successful request
@@ -76,6 +100,8 @@ def handle_response(response: httpx.Response):
     else:
         raise HTTPException(status_code=500, detail="Unexpected error")
 # Run the application
 if __name__ == "__main__":
     import uvicorn

+from fastapi import FastAPI, HTTPException, Form
 import httpx
 import os
 from dotenv import load_dotenv
 app = FastAPI()
+# Bearer token and base URL for external API (from environment variables)
 BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
 BASE_URL = os.getenv("BASE_URL")
+@app.post("/actions/meducine-restapi/auth/login")
 async def login(email: str = Form(...), password: str = Form(...)):
+    """
+    Handles login by sending a request to the external API with the static Bearer token.
+    Even though it simulates a login, it uses the static Bearer token for authentication.
+    """
+    try:
+        # Send login request (simulates login but uses static Bearer token)
+        response = await make_request(
+            url=f"{BASE_URL}/actions/meducine-restapi/auth/login",
             data={"email": email, "password": password},
+            method="POST"
         )
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Login failed: {str(e)}")
+@app.post("/actions/meducine-restapi/auth/logout")
+async def logout(email: str = Form(...)):
+    """
+    Handles logout using the static Bearer token.
+    """
+    try:
+        # Simulates logging out but uses the static Bearer token
+        response = await make_request(
+            url=f"{BASE_URL}/actions/meducine-restapi/auth/logout",
+            data={"email": email},
+            method="POST"
         )
+        return {"message": "Logout successful", "response": response}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Logout failed: {str(e)}")
+@app.post("/meducine-restapi/user/has-premium-access")
+async def check_premium_access(feature: str, email: str = Form(...), password: str = Form(...)):
+    """
+    Checks if the user has premium access to a feature, using the static Bearer token for authentication.
+    """
+    try:
+        response = await make_request(
+            url=f"{BASE_URL}/actions/meducine-restapi/user/has-premium-access",
+            data={"email": email, "password": password},
             params={"feature": feature},
+            method="POST"
         )
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Premium access check failed: {str(e)}")
+async def make_request(url: str, data: dict = None, method: str = "GET", params: dict = None):
+    """
+    Helper function to make an HTTP request to the external API with the static Bearer token.
+    """
+    headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
+    async with httpx.AsyncClient() as client:
+        try:
+            if method == "POST":
+                response = await client.post(url, data=data, params=params, headers=headers)
+            elif method == "GET":
+                response = await client.get(url, params=params, headers=headers)
+            else:
+                raise HTTPException(status_code=405, detail="Method not allowed")
+            response.raise_for_status()  # Raise exception for 4xx or 5xx errors
+            return handle_response(response)
+        except httpx.HTTPStatusError as e:
+            raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
 def handle_response(response: httpx.Response):
     """
+    Handles the API response, returning JSON data or raising exceptions based on status codes.
     """
     if response.status_code in range(200, 300):
         return response.json()  # Successful request
     else:
         raise HTTPException(status_code=500, detail="Unexpected error")
 # Run the application
 if __name__ == "__main__":
     import uvicorn

api/router/topic.py DELETED Viewed

@@ -1,69 +0,0 @@
-from fastapi import Form, APIRouter, File, UploadFile, HTTPException, Request
-from db.repository import get_db_conn
-from db.get_data import GetDatabase
-from db.save_data import InsertDatabase
-from config import MYSQL_CONFIG
-from api.function import data_ingestion, get_data, delete_data, update_data
-from script.vector_db import IndexManager
-from service.dto import MetadataRequest
-router = APIRouter(tags=["Topics"])
-db_conn = get_db_conn(MYSQL_CONFIG)
-get_database = GetDatabase(db_conn)
-index_manager = IndexManager()
-@router.post("/topic")
-async def upload_file(
-    title: str = Form(...),
-    author: str = Form(...),
-    category: str = Form(...),
-    year: int = Form(...),
-    publisher: str = Form(...),
-    file: UploadFile = File(...),
-    # content_table: UploadFile = File(...)
-):
-    reference = {
-        "title": title,
-        "author": author,
-        "category": category,
-        "year": year,
-        "publisher": publisher,
-    }
-    # response = await data_ingestion(db_conn, reference, file, content_table)
-    response = await data_ingestion(db_conn, reference, file)
-    return {"filename": file.filename, "response": response}
-@router.get("/topic")
-async def get_metadata():
-    results = await get_data(db_conn)
-    return results
-@router.put("/topic/{id}")
-async def update_metadata(id: int, reference: MetadataRequest):
-    try :
-        old_reference = await get_database.get_data_by_id(id)
-        index_manager.update_vector_database(old_reference, reference)
-        return await update_data(id, reference, db_conn)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail="An error occurred while updating metadata")
-@router.delete("/topic/{id}")
-async def delete_metadata(id: int):
-    try:
-        old_reference = await get_database.get_data_by_id(id)
-        index_manager.delete_vector_database(old_reference)
-        return await delete_data(id, db_conn)
-    except Exception as e:
-        print(e)
-        raise HTTPException(status_code=500, detail="An error occurred while delete metadata")

api/router/user.py CHANGED Viewed

@@ -1,3 +1,6 @@
 from datetime import timedelta
 from typing import Annotated
@@ -12,6 +15,12 @@ from db.database import get_db
 from api.auth import get_current_user, create_access_token
 from service.dto import CreateUserRequest, UserVerification, Token
 router = APIRouter(tags=["User"])
@@ -23,19 +32,82 @@ user_dependency = Annotated[dict, Depends(get_current_user)]
 ACCESS_TOKEN_EXPIRE_MINUTES = 43200
 @router.post("/login", response_model=Token)
 async def login_for_access_token(
     login_data: Annotated[OAuth2PasswordRequestForm, Depends()],
     db: Session = Depends(get_db),
 ):
-    user = db.query(User).filter(User.username == login_data.username).first()
-    if not user or not bcrypt_context.verify(login_data.password, user.password_hash):
-        return JSONResponse(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            content="Incorrect username or password",
-            headers={"WWW-Authenticate": "Bearer"},
         )
     try:
         access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
@@ -52,7 +124,7 @@ async def login_for_access_token(
     except Exception as e:
         print(e)
-        return JSONResponse(status_code=500, content="An error occuring when login")
 @router.get("/login", response_model=dict)
@@ -90,7 +162,6 @@ async def get_all_users(user: user_dependency, db: Session = Depends(get_db)):
     ]
-@router.post("/register")
 async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
     existing_user = (
         db.query(User).filter(User.email == create_user_request.email).first()
@@ -122,27 +193,27 @@ async def register_user(db: db_dependency, create_user_request: CreateUserReques
         )
-@router.post("/forgot_password")
-async def forget_password():
-    pass
-@router.post("/change_password")
-async def change_password(
-    user: user_dependency, db: db_dependency, user_verification: UserVerification
-):
-    if user is None:
-        return JSONResponse(status_code=401, content="Authentication Failed")
-    user_model = db.query(User).filter(User.id == user.get("id")).first()
-    if not bcrypt_context.verify(
-        user_verification.password, user_model.hashed_password
-    ):
-        return JSONResponse(status_code=401, content="Error on password change")
-    user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
-    db.add(user_model)
-    db.commit()
-    db.refresh(user_model)
-    return {"message": "User's password successfully changed", "user_id": user_model.id}

+import os
+from dotenv import load_dotenv
 from datetime import timedelta
 from typing import Annotated
 from api.auth import get_current_user, create_access_token
 from service.dto import CreateUserRequest, UserVerification, Token
+from collections import Counter
+from time import time
+load_dotenv()
 router = APIRouter(tags=["User"])
 ACCESS_TOKEN_EXPIRE_MINUTES = 43200
+# Rate-limiting config
+FAILED_ATTEMPT_LIMIT = 3
+BLOCK_TIME_SECONDS = 300  # Block for 5 minutes
+# In-memory tracking for failed attempts
+failed_attempts = Counter()
+blocked_users = {}
 @router.post("/login", response_model=Token)
 async def login_for_access_token(
     login_data: Annotated[OAuth2PasswordRequestForm, Depends()],
     db: Session = Depends(get_db),
 ):
+    username = login_data.username
+    # Check if user is blocked
+    if username in blocked_users:
+        block_until = blocked_users[username]
+        if time() < block_until:
+            return JSONResponse(
+                status_code=status.HTTP_403_FORBIDDEN,
+                content=f"Too many failed attempts. Try again after {int(block_until - time())} seconds.",
+            )
+        else:
+            # Unblock the user after the time period
+            del blocked_users[username]
+            del failed_attempts[username]
+    user = db.query(User).filter(User.username == username).first()
+    if not user:
+        # Automatically register the user
+        create_user_request = CreateUserRequest(
+            name=login_data.username,
+            username=login_data.username,
+            email=login_data.username,
+            password=os.getenv("USER_PASSWORD"),  # Replace with a generated or temporary password
+            role_id=2,
+        )
+        registration_response = await register_user(db, create_user_request)
+        if isinstance(registration_response, JSONResponse):
+            return registration_response  # Return error response if registration failed
+        # Retrieve the newly created user after successful registration
+        user = db.query(User).filter(User.username == username).first()
+        if not user:
+            return JSONResponse(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                content="User registration failed unexpectedly."
+            )
+        correct_password = (
+            bcrypt_context.verify(os.getenv("USER_PASSWORD"), user.password_hash) or
+            bcrypt_context.verify(login_data.password, user.password_hash)
         )
+        if not correct_password :
+            failed_attempts[username] = failed_attempts.get(username, 0) + 1
+            if failed_attempts[username] >= FAILED_ATTEMPT_LIMIT:
+                blocked_users[username] = time() + BLOCK_TIME_SECONDS
+                failed_attempts.pop(username, None)  # Reset after blocking
+                return JSONResponse(
+                    status_code=status.HTTP_403_FORBIDDEN,
+                    content="Too many failed attempts. You are temporarily blocked."
+                )
+            return JSONResponse(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                content="Invalid credentials."
+            )
+    failed_attempts.pop(username, None)
     try:
         access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
     except Exception as e:
         print(e)
+        return JSONResponse(status_code=500, content="An error occurred during login")
 @router.get("/login", response_model=dict)
     ]
 async def register_user(db: db_dependency, create_user_request: CreateUserRequest):
     existing_user = (
         db.query(User).filter(User.email == create_user_request.email).first()
         )
+# @router.post("/forgot_password")
+# async def forget_password():
+#     pass
+# @router.post("/change_password")
+# async def change_password(
+#     user: user_dependency, db: db_dependency, user_verification: UserVerification
+# ):
+#     if user is None:
+#         return JSONResponse(status_code=401, content="Authentication Failed")
+#     user_model = db.query(User).filter(User.id == user.get("id")).first()
+#     if not bcrypt_context.verify(
+#         user_verification.password, user_model.hashed_password
+#     ):
+#         return JSONResponse(status_code=401, content="Error on password change")
+#     user_model.hashed_password = bcrypt_context.hash(user_verification.new_password)
+#     db.add(user_model)
+#     db.commit()
+#     db.refresh(user_model)
+#     return {"message": "User's password successfully changed", "user_id": user_model.id}

api/util/util.py DELETED Viewed

File without changes

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ def create_instance() -> FastAPI:
 def add_middleware(app: FastAPI) -> FastAPI:
     app.add_middleware(
         CORSMiddleware,
-        allow_origins=["https://chatbook-multimedika.vercel.app/","http://localhost:3000"],
         allow_credentials=True,
         allow_methods=["*"],
         allow_headers=["*"],
@@ -48,7 +48,7 @@ def register_routers(app: FastAPI) -> FastAPI:
     app.include_router(health.router)
     return app
-""
 def init_app() -> FastAPI:
     app: FastAPI = pipe(

 def add_middleware(app: FastAPI) -> FastAPI:
     app.add_middleware(
         CORSMiddleware,
+        allow_origins=["*"],
         allow_credentials=True,
         allow_methods=["*"],
         allow_headers=["*"],
     app.include_router(health.router)
     return app
 def init_app() -> FastAPI:
     app: FastAPI = pipe(

config.py CHANGED Viewed

@@ -30,7 +30,7 @@ class PineconeConfig(BaseSettings):
 class GPTBotConfig(BaseSettings):
     temperature : float = 0.3
     model : str = "gpt-4o-mini"
-    max_tokens : int = 512
     streaming : bool = False
     api_key : str = os.environ.get("OPENAI_API_KEY")

 class GPTBotConfig(BaseSettings):
     temperature : float = 0.3
     model : str = "gpt-4o-mini"
+    max_tokens : int = 4096
     streaming : bool = False
     api_key : str = os.environ.get("OPENAI_API_KEY")

controller/__init__.py DELETED Viewed

File without changes

controller/book_collection_controller.py DELETED Viewed

File without changes

controller/book_controller.py DELETED Viewed

File without changes

controller/bot_general_controller.py DELETED Viewed

File without changes

controller/bot_one_controller.py DELETED Viewed

File without changes

controller/bot_specific_controller.py DELETED Viewed

File without changes

controller/category_controller.py DELETED Viewed

File without changes

controller/user_controller.py DELETED Viewed

File without changes

core/book_enabler/__init__.py DELETED Viewed

File without changes

core/chat/bot_service.py CHANGED Viewed

@@ -69,7 +69,6 @@ class ChatCompletionService:
             if self.type_bot == "general":
                 response = redesign_structure_message(response, metadata_collection)
-                print(response)
             # Save the message to chat store
             self._store_message_in_chatstore(response, metadata_collection)
@@ -136,6 +135,30 @@ class ChatCompletionService:
                 print("No sources available")
         return contents, metadata_collection, scores
     def _attach_contents_to_metadata(self, contents, metadata_collection):
         for i in range(min(len(contents), len(metadata_collection))):
@@ -171,5 +194,4 @@ class ChatCompletionService:
         db = self.client["bot_database"]  # Replace with your database name
         collection = db[self.session_id]  # Replace with your collection name
-        result = collection.insert_many(chat_history_json)
-        print("Data inserted with record ids", result.inserted_ids)

             if self.type_bot == "general":
                 response = redesign_structure_message(response, metadata_collection)
             # Save the message to chat store
             self._store_message_in_chatstore(response, metadata_collection)
                 print("No sources available")
         return contents, metadata_collection, scores
+    # def _process_sources_images(self, sources, number_reference_sorted):
+    #     contents, metadata_collection, scores = [], [], []
+    #     if not number_reference_sorted:
+    #         print("There are no references")
+    #         return contents, metadata_collection, scores
+    #     for number in range (len(sources)):
+    #         number = int(number)
+    #         if sources and len(sources) > 0:
+    #             node = dict(sources[0])["raw_output"].source_nodes
+    #             if 0 <= number - 1 < len(node):
+    #                 content = node[number - 1].node.get_text()
+    #                 contents.append(content)
+    #                 metadata = dict(node[number - 1].node.metadata)
+    #                 metadata_collection.append(metadata)
+    #                 score = node[number - 1].score
+    #                 scores.append(score)
+    #             else:
+    #                 print(f"Invalid reference number: {number}")
+    #         else:
+    #             print("No sources available")
+    #     return contents, metadata_collection, scores
     def _attach_contents_to_metadata(self, contents, metadata_collection):
         for i in range(min(len(contents), len(metadata_collection))):
         db = self.client["bot_database"]  # Replace with your database name
         collection = db[self.session_id]  # Replace with your collection name
+        collection.insert_many(chat_history_json)

core/chat/bot_service_multimodal.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import logging
+import re
+import os
+import pytz
+from typing import List
+from datetime import datetime
+from datetime import timedelta
+from fastapi.responses import JSONResponse
+from script.vector_db import IndexManager
+from llama_index.core.llms import MessageRole
+from core.chat.engine import Engine
+from core.chat.chatstore import ChatStore
+from core.parser import (
+    filter_metadata_by_pages,
+    extract_sorted_page_numbers
+)
+from service.dto import ChatMessage
+from pymongo.mongo_client import MongoClient
+class ChatCompletionService:
+    def __init__(
+        self,
+        session_id: str,
+        user_request: str,
+        titles: List = None,
+        type_bot: str = "general",
+    ):
+        self.session_id = session_id
+        self.user_request = user_request
+        self.titles = titles
+        self.type_bot = type_bot
+        self.client = MongoClient(os.getenv("MONGO_URI"))
+        self.engine = Engine()
+        self.index_manager = IndexManager()
+        self.chatstore = ChatStore()
+    def generate_completion(self):
+        if not self._ping_mongo():
+            return JSONResponse(
+                status_code=500, content="Database Error: Unable to connect to MongoDB"
+            )
+        try:
+            # Load and retrieve chat engine with appropriate index
+            index = self.index_manager.load_existing_indexes()
+            chat_engine = self._get_chat_engine(index)
+            # Generate chat response
+            response = chat_engine.chat(self.user_request)
+            sources = response.source_nodes
+            contents, metadata_collection, scores = self._process_sources_images(sources)
+            # Update response and renumber sources
+            response = str(response)
+            # Add contents to metadata
+            metadata_collection = self._attach_contents_to_metadata(
+                contents, metadata_collection
+            )
+            page_sources = extract_sorted_page_numbers(response)
+            metadata_collection = filter_metadata_by_pages(metadata_collection, page_sources)
+            # Save the message to chat store
+            self._store_message_in_chatstore(response, metadata_collection)
+        except Exception as e:
+            logging.error(f"An error occurred in generate text: {e}")
+            return JSONResponse(
+                status_code=500, content=f"An internal server error occurred: {e}"
+            )
+        try:
+            if self.type_bot == "specific":
+                self._save_chat_history_to_db(response, metadata_collection)
+            return str(response), metadata_collection, scores
+        except Exception as e:
+            logging.error(f"An error occurred while saving chat history: {e}")
+            return JSONResponse(
+                status_code=500,
+                content=f"An internal server error occurred while saving chat history: {e}",
+            )
+    def _ping_mongo(self):
+        try:
+            self.client.admin.command("ping")
+            print("Pinged your deployment. Successfully connected to MongoDB!")
+            return True
+        except Exception as e:
+            logging.error(f"MongoDB connection failed: {e}")
+            return False
+    def _get_chat_engine(self, index):
+        if self.type_bot == "general":
+            return self.engine.get_chat_engine(self.session_id, index)
+        return self.engine.get_chat_engine(
+            self.session_id, index, self.titles, self.type_bot
+        )
+    def _extract_sorted_references(self, response):
+        number_reference = list(set(re.findall(r"\[(\d+)\]", str(response))))
+        return sorted(number_reference)
+    def _process_sources_images(self, sources):
+        contents, metadata_collection, scores = [], [], []
+        for number in range (len(sources)):
+            if sources and len(sources) > 0:
+                content = sources[number - 1].node.get_text()
+                contents.append(content)
+                metadata = dict(sources[number - 1].node.metadata)
+                metadata_collection.append(metadata)
+                score = sources[number - 1].score
+                scores.append(score)
+            else:
+                print("No sources available")
+        return contents, metadata_collection, scores
+    def _attach_contents_to_metadata(self, contents, metadata_collection):
+        for i in range(min(len(contents), len(metadata_collection))):
+            metadata_collection[i]["content"] = contents[i]
+        return metadata_collection
+    def _store_message_in_chatstore(self, response, metadata_collection):
+        message = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=response,
+            metadata=metadata_collection,
+        )
+        self.chatstore.delete_last_message(self.session_id)
+        self.chatstore.add_message(self.session_id, message)
+        self.chatstore.clean_message(self.session_id)
+    def _save_chat_history_to_db(self, response, metadata_collection):
+        jakarta_tz = pytz.timezone("Asia/Jakarta")
+        time_now = datetime.now(jakarta_tz)
+        user_timestamp = time_now - timedelta(seconds=0.2)
+        chat_history_db = [
+            ChatMessage(
+                role=MessageRole.USER,
+                content=self.user_request,
+                timestamp=user_timestamp,
+                payment="free" if self.type_bot == "general" else None,
+            ),
+            ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content=response,
+                metadata=metadata_collection,
+                timestamp=time_now,
+                payment="free" if self.type_bot == "general" else None,
+            ),
+        ]
+        chat_history_json = [message.model_dump() for message in chat_history_db]
+        db = self.client["bot_database"]  # Replace with your database name
+        collection = db[self.session_id]  # Replace with your collection name
+        collection.insert_many(chat_history_json)

core/chat/chatstore.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import redis
 import os
 import json
 from fastapi.responses import JSONResponse
 from typing import Optional, List, Dict
 from llama_index.storage.chat_store.redis import RedisChatStore
@@ -8,44 +12,50 @@ from pymongo.mongo_client import MongoClient
 from llama_index.core.memory import ChatMemoryBuffer
 from service.dto import ChatMessage
 class ChatStore:
     def __init__(self):
         self.redis_client = redis.Redis(
-            host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
-            port=10365,
-            password=os.environ.get("REDIS_PASSWORD"),
         )
         uri = os.getenv("MONGO_URI")
         self.client = MongoClient(uri)
     def initialize_memory_bot(self, session_id):
         chat_store = RedisChatStore(
             redis_client=self.redis_client, ttl=86400  # Time-to-live set for 1 hour
         )
         db = self.client["bot_database"]
-        if (
-            self.redis_client.exists(session_id)
-            or session_id in db.list_collection_names()
-        ):
-            if session_id not in self.redis_client.keys():
-                self.add_chat_history_to_redis(
-                    session_id
-                )  # Add chat history to Redis if not found
-            # Create memory buffer with chat store and session key
             memory = ChatMemoryBuffer.from_defaults(
                 token_limit=3000, chat_store=chat_store, chat_store_key=session_id
             )
         else:
-            # Handle the case where the session doesn't exist
             memory = ChatMemoryBuffer.from_defaults(
                 token_limit=3000, chat_store=chat_store, chat_store_key=session_id
             )
         return memory
     def get_messages(self, session_id: str) -> List[dict]:
@@ -56,14 +66,14 @@ class ChatStore:
         # Decode and parse each item into a dictionary
         return [json.loads(m.decode("utf-8")) for m in items]
     def get_last_message(self, session_id: str) -> Optional[Dict]:
         """Get the last message for a session_id."""
         last_message = self.redis_client.lindex(session_id, -1)
         if last_message is None:
             return None  # Return None if there are no messages
         # Decode and parse the last message into a dictionary
         return json.loads(last_message.decode("utf-8"))
@@ -73,11 +83,13 @@ class ChatStore:
         # Get the last document by sorting by _id in descending order
         last_document = collection.find().sort("_id", -1).limit(1)
         for doc in last_document:
-            doc["content"]
-        return str(doc["content"])
     def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
         """Delete last message for a session_id."""
@@ -113,21 +125,23 @@ class ChatStore:
     def get_keys(self) -> List[str]:
         """Get all keys."""
         try:
-            print(self.redis_client.keys("*"))
             return [key.decode("utf-8") for key in self.redis_client.keys("*")]
         except Exception as e:
-            # Log the error and return JSONResponse for FastAPI
-            print(f"An error occurred in update data.: {e}")
             return JSONResponse(status_code=400, content="the error when get keys")
-    def add_message(self, session_id: str, message: ChatMessage) -> None:
         """Add a message for a session_id."""
         item = json.dumps(self._message_to_dict(message))
         self.redis_client.rpush(session_id, item)
-    def _message_to_dict(self, message: ChatMessage) -> dict:
-        return message.model_dump()
     def add_chat_history_to_redis(self, session_id: str) -> None:
         """Fetch chat history from MongoDB and add it to Redis."""
@@ -169,18 +183,11 @@ class ChatStore:
             # Convert the cursor to a list and exclude the _id field
             documents_list = [
-                {key: doc[key] for key in doc if key != "_id" and doc[key] is not None}
                 for doc in documents
             ]
-            # Print the list of documents without the _id field
-            print(documents_list)  # Optional: If you want to see the output
             return documents_list
         except Exception as e:
-            print(f"An error occurred while retrieving messages: {e}")
-            return JSONResponse(
-                status_code=500,
-                content=f"An error occurred while retrieving messages: {e}",
-            )

 import redis
 import os
 import json
+from datetime import datetime
+from dotenv import load_dotenv
 from fastapi.responses import JSONResponse
 from typing import Optional, List, Dict
 from llama_index.storage.chat_store.redis import RedisChatStore
 from llama_index.core.memory import ChatMemoryBuffer
 from service.dto import ChatMessage
+load_dotenv()
 class ChatStore:
     def __init__(self):
         self.redis_client = redis.Redis(
+            # host="redis-10365.c244.us-east-1-2.ec2.redns.redis-cloud.com",
+            host = os.getenv("REDIS_HOST"),
+            port=os.getenv("REDIS_PORT"),
+            username = os.getenv("REDIS_USERNAME"),
+            password=os.getenv("REDIS_PASSWORD"),
         )
         uri = os.getenv("MONGO_URI")
         self.client = MongoClient(uri)
     def initialize_memory_bot(self, session_id):
+        # Decode Redis keys to work with strings
+        redis_keys = [key.decode('utf-8') for key in self.redis_client.keys()]
         chat_store = RedisChatStore(
             redis_client=self.redis_client, ttl=86400  # Time-to-live set for 1 hour
         )
         db = self.client["bot_database"]
+        # Check if the session exists in Redis or MongoDB
+        if session_id in redis_keys:
+            # If the session already exists in Redis, create the memory buffer using Redis
+            memory = ChatMemoryBuffer.from_defaults(
+                token_limit=3000, chat_store=chat_store, chat_store_key=session_id
+            )
+        elif session_id in db.list_collection_names():
+            # If the session exists in MongoDB but not Redis, fetch messages from MongoDB
+            self.add_chat_history_to_redis(session_id)  # Add chat history to Redis
+            # Then create the memory buffer using Redis
             memory = ChatMemoryBuffer.from_defaults(
                 token_limit=3000, chat_store=chat_store, chat_store_key=session_id
             )
         else:
+            # If the session doesn't exist in either Redis or MongoDB, create an empty memory buffer
             memory = ChatMemoryBuffer.from_defaults(
                 token_limit=3000, chat_store=chat_store, chat_store_key=session_id
             )
         return memory
     def get_messages(self, session_id: str) -> List[dict]:
         # Decode and parse each item into a dictionary
         return [json.loads(m.decode("utf-8")) for m in items]
     def get_last_message(self, session_id: str) -> Optional[Dict]:
         """Get the last message for a session_id."""
         last_message = self.redis_client.lindex(session_id, -1)
         if last_message is None:
             return None  # Return None if there are no messages
         # Decode and parse the last message into a dictionary
         return json.loads(last_message.decode("utf-8"))
         # Get the last document by sorting by _id in descending order
         last_document = collection.find().sort("_id", -1).limit(1)
+        # Iterasi last_document dan kembalikan isi content jika ada
         for doc in last_document:
+            return str(doc.get('content', ""))  # kembalikan content atau string kosong jika tidak ada
+        # Jika tidak ada dokumen, kembalikan string kosong
+        return ""
     def delete_last_message(self, session_id: str) -> Optional[ChatMessage]:
         """Delete last message for a session_id."""
     def get_keys(self) -> List[str]:
         """Get all keys."""
         try:
             return [key.decode("utf-8") for key in self.redis_client.keys("*")]
         except Exception as e:
             return JSONResponse(status_code=400, content="the error when get keys")
+    def add_message(self, session_id: str, message: Optional[ChatMessage]) -> None:
         """Add a message for a session_id."""
         item = json.dumps(self._message_to_dict(message))
         self.redis_client.rpush(session_id, item)
+    def _message_to_dict(self, message: Optional[ChatMessage]) -> dict:
+        # Convert the ChatMessage instance into a dictionary with necessary adjustments
+        message_dict = message.model_dump()
+        # Convert any datetime fields to ISO format, if needed
+        if isinstance(message_dict.get('timestamp'), datetime):
+            message_dict['timestamp'] = message_dict['timestamp'].isoformat()
+        return message_dict
     def add_chat_history_to_redis(self, session_id: str) -> None:
         """Fetch chat history from MongoDB and add it to Redis."""
             # Convert the cursor to a list and exclude the _id field
             documents_list = [
+                {key: doc[key] for key in doc if key !="_id" and doc[key] is not None}
                 for doc in documents
             ]
             return documents_list
         except Exception as e:
+            return JSONResponse(status_code=500, content=f"An error occurred while retrieving messages: {e}")

core/chat/engine.py CHANGED Viewed

@@ -8,11 +8,14 @@ from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from llama_index.agent.openai import OpenAIAgent
 from llama_index.llms.openai import OpenAI
 from llama_index.core.query_engine import CitationQueryEngine
 from llama_index.core import Settings
 from core.chat.chatstore import ChatStore
 from config import GPTBOT_CONFIG
-from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS
 from core.parser import join_list
@@ -27,8 +30,11 @@ class Engine:
         self.chat_store = ChatStore()
         Settings.llm = self.llm
     def get_citation_engine(self, titles:List, index):
         filters = [
             MetadataFilter(
                 key="title",
@@ -41,41 +47,66 @@ class Engine:
         filters = MetadataFilters(filters=filters, condition="or")
         # Create the QueryEngineTool with the index and filters
-        kwargs = {"similarity_top_k": 5, "filters": filters}
         retriever = index.as_retriever(**kwargs)
         # citation_engine = CitationQueryEngine(retriever=retriever)
-        return CitationQueryEngine.from_args(index, retriever=retriever)
     def get_chat_engine(self, session_id, index, titles=None, type_bot="general"):
         # Create the QueryEngineTool based on the type
         if type_bot == "general":
             # query_engine = index.as_query_engine(similarity_top_k=3)
-            citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
-            description = "A book containing information about medicine"
         else:
             citation_engine = self.get_citation_engine(titles, index)
-            description = "A book containing information about medicine"
-        metadata = ToolMetadata(name="bot-belajar", description=description)
-        print(metadata)
-        vector_query_engine = QueryEngineTool(
-            query_engine=citation_engine, metadata=metadata
         )
-        print(vector_query_engine)
         # Initialize the OpenAI agent with the tools
         if type_bot == "general":
-            system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information="")
         else:
             additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
-            system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
         chat_engine = OpenAIAgent.from_tools(
-            tools=[vector_query_engine],
             llm=self.llm,
             memory=self.chat_store.initialize_memory_bot(session_id),
             system_prompt=system_prompt,

 from llama_index.agent.openai import OpenAIAgent
 from llama_index.llms.openai import OpenAI
 from llama_index.core.query_engine import CitationQueryEngine
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.multi_modal_llms.openai import OpenAIMultiModal
 from llama_index.core import Settings
 from core.chat.chatstore import ChatStore
+from core.multimodal import MultimodalQueryEngine
 from config import GPTBOT_CONFIG
+from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS,SYSTEM_BOT_GENERAL_TEMPLATE, SYSTEM_BOT_IMAGE_TEMPLATE
 from core.parser import join_list
         self.chat_store = ChatStore()
         Settings.llm = self.llm
+        embed_model = OpenAIEmbedding(model="text-embedding-3-large")
+        Settings.embed_model = embed_model
     def get_citation_engine(self, titles:List, index):
+        model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
         filters = [
             MetadataFilter(
                 key="title",
         filters = MetadataFilters(filters=filters, condition="or")
         # Create the QueryEngineTool with the index and filters
+        kwargs = {"similarity_top_k": 10, "filters": filters}
         retriever = index.as_retriever(**kwargs)
         # citation_engine = CitationQueryEngine(retriever=retriever)
+        # return CitationQueryEngine.from_args(index, retriever=retriever)
+        return MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal)
     def get_chat_engine(self, session_id, index, titles=None, type_bot="general"):
         # Create the QueryEngineTool based on the type
         if type_bot == "general":
             # query_engine = index.as_query_engine(similarity_top_k=3)
+            # citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5)
+            model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
+            retriever = index.as_retriever(similarity_top_k=10)
+            citation_engine = MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal)
+            # description = "A book containing information about medicine"
         else:
             citation_engine = self.get_citation_engine(titles, index)
+            # description = "A book containing information about medicine"
+        # metadata = ToolMetadata(name="bot-belajar", description=description)
+        # vector_query_engine = QueryEngineTool(
+        #     query_engine=citation_engine, metadata=metadata
+        # )
+        vector_tool = QueryEngineTool.from_defaults(
+            query_engine=citation_engine,
+            name="vector_tool",
+            description=(
+                "Useful for retrieving specific context from the data from a book containing information about medicine"
+            ),
         )
         # Initialize the OpenAI agent with the tools
+        # if type_bot == "general":
+        #     system_prompt = SYSTEM_BOT_GENERAL_TEMPLATE
+        # else:
+        #     additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
+        #     system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information)
+        # chat_engine = OpenAIAgent.from_tools(
+        #     tools=[vector_query_engine],
+        #     llm=self.llm,
+        #     memory=self.chat_store.initialize_memory_bot(session_id),
+        #     system_prompt=system_prompt,
+        # )
         if type_bot == "general":
+            system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE
         else:
             additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles))
+            system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE.format(additional_information=additional_information)
         chat_engine = OpenAIAgent.from_tools(
+            tools=[vector_tool],
             llm=self.llm,
             memory=self.chat_store.initialize_memory_bot(session_id),
             system_prompt=system_prompt,

core/chat/messaging.py DELETED Viewed

@@ -1,63 +0,0 @@
-# Experimental
-from typing import Dict, Any, Optional, List
-import asyncio
-import logging
-from uuid import uuid4
-from anyio import ClosedResourceError
-from anyio.streams.memory import MemoryObjectSendStream
-from llama_index.core.callbacks.base import BaseCallbackHandler, CallbackManager
-from llama_index.core.callbacks import CBEventType, EventPayload
-from llama_index.core.query_engine.sub_question_query_engine import (
-    SubQuestionAnswerPair,
-)
-from llama_index.core.chat_engine.types import StreamingAgentChatResponse
-from pydantic import BaseModel
-from core.chat import schema
-from db.db import MessageSubProcessSourceEnum
-from core.chat.schema import SubProcessMetadataKeysEnum, SubProcessMetadataMap
-from core.chat.engine import Engine
-from script.vector_db import IndexManager
-from service.dto import UserPromptRequest
-logger = logging.getLogger(__name__)
-class StreamedMessage(BaseModel):
-    content: str
-async def handle_chat_message(
-    user_message: str,
-    send_chan: MemoryObjectSendStream,
-) -> None:
-    async with send_chan:
-        engine = Engine()
-        index_manager = IndexManager()
-        index = index_manager.load_existing_indexes()
-        # Retrieve the chat engine with the loaded index
-        chat_engine = await engine.get_chat_engine(index)
-        logger.debug("Engine received")
-        streaming_chat_response: StreamingAgentChatResponse = (
-            await chat_engine.astream_chat(user_message)
-        )
-        response_str = ""
-        async for text in streaming_chat_response.async_response_gen():
-            response_str += text
-            if send_chan._closed:
-                logger.debug(
-                    "Received streamed token after send channel closed. Ignoring."
-                )
-                return
-            await send_chan.send(StreamedMessage(content=response_str))
-        if response_str.strip() == "":
-            await send_chan.send(
-                StreamedMessage(
-                    content="Sorry, I either wasn't able to understand your question or I don't have an answer for it."
-                )
-            )

core/chat/schema.py DELETED Viewed

@@ -1,162 +0,0 @@
-# Experimental
-from pydantic import BaseModel, Field, field_validator
-from typing import List, Optional, Dict, Union, Any
-from enum import Enum
-from uuid import UUID
-from datetime import datetime
-from llama_index.core.schema import BaseNode, NodeWithScore
-from llama_index.core.callbacks.schema import EventPayload
-from llama_index.core.query_engine.sub_question_query_engine import SubQuestionAnswerPair
-from db.db import (
-    MessageRoleEnum,
-    MessageStatusEnum,
-    MessageSubProcessSourceEnum,
-    MessageSubProcessStatusEnum,
-)
-DB_DOC_ID_KEY = "db_document_id"
-class Base(BaseModel):
-    id: Optional[UUID] = Field(None, description="Unique identifier")
-    created_at: Optional[datetime] = Field(None, description="Creation datetime")
-    updated_at: Optional[datetime] = Field(None, description="Update datetime")
-    class Config:
-        orm_mode = True
-class BaseMetadataObject(BaseModel):
-    class Config:
-        orm_mode = True
-class Citation(BaseMetadataObject):
-    document_id: UUID
-    text: str
-    page_number: int
-    score: Optional[float]
-    @field_validator("document_id")
-    def validate_document_id(cls, value):
-        if value:
-            return str(value)
-        return value
-    @classmethod
-    def from_node(cls, node_w_score: NodeWithScore) -> "Citation":
-        node: BaseNode = node_w_score.node
-        page_number = int(node.source_node.metadata["page_label"])
-        document_id = node.source_node.metadata[""]
-        return cls(
-            document_id=document_id,
-            text=node.get_content(),
-            page_number=page_number,
-            score=node_w_score.score,
-        )
-class QuestionAnswerPair(BaseMetadataObject):
-    """
-    A question-answer pair that is used to store the sub-questions and answers
-    """
-    question: str
-    answer: Optional[str]
-    citations: Optional[List[Citation]] = None
-    @classmethod
-    def from_sub_question_answer_pair(
-        cls, sub_question_answer_pair: SubQuestionAnswerPair
-    ):
-        if sub_question_answer_pair.sources is None:
-            citations = None
-        else:
-            citations = [
-                Citation.from_node(node_w_score)
-                for node_w_score in sub_question_answer_pair.sources
-                if node_w_score.node.source_node is not None
-                and DB_DOC_ID_KEY in node_w_score.node.source_node.metadata
-            ]
-        citations = citations or None
-        return cls(
-            question=sub_question_answer_pair.sub_q.sub_question,
-            answer=sub_question_answer_pair.answer,
-            citations=citations,
-        )
-# later will be Union[QuestionAnswerPair, more to add later... ]
-class SubProcessMetadataKeysEnum(str, Enum):
-    SUB_QUESTION = EventPayload.SUB_QUESTION.value
-# keeping the typing pretty loose here, in case there are changes to the metadata data formats.
-SubProcessMetadataMap = Dict[Union[SubProcessMetadataKeysEnum, str], Any]
-class MessageSubProcess(Base):
-    message_id: UUID
-    source: MessageSubProcessSourceEnum
-    status: MessageSubProcessStatusEnum
-    metadata_map: Optional[SubProcessMetadataMap]
-class Message(Base):
-    conversation_id: UUID
-    content: str
-    role: MessageRoleEnum
-    status: MessageStatusEnum
-    sub_processes: List[MessageSubProcess]
-class UserMessageCreate(BaseModel):
-    content: str
-class DocumentMetadataKeysEnum(str, Enum):
-    """
-    Enum for the keys of the metadata map for a document
-    """
-    SEC_DOCUMENT = "sec_document"
-class SecDocumentTypeEnum(str, Enum):
-    """
-    Enum for the type of sec document
-    """
-    TEN_K = "10-K"
-    TEN_Q = "10-Q"
-class SecDocumentMetadata(BaseModel):
-    """
-    Metadata for a document that is a sec document
-    """
-    company_name: str
-    company_ticker: str
-    doc_type: SecDocumentTypeEnum
-    year: int
-    quarter: Optional[int]
-    accession_number: Optional[str]
-    cik: Optional[str]
-    period_of_report_date: Optional[datetime]
-    filed_as_of_date: Optional[datetime]
-    date_as_of_change: Optional[datetime]
-DocumentMetadataMap = Dict[Union[DocumentMetadataKeysEnum, str], Any]
-class Document(Base):
-    url: str
-    metadata_map: Optional[DocumentMetadataMap] = None
-class Conversation(Base):
-    messages: List[Message]
-    documents: List[Document]
-class ConversationCreate(BaseModel):
-    document_ids: List[UUID]

core/module_creator/__init__.py DELETED Viewed

File without changes

core/multimodal.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from llama_index.core.query_engine import CustomQueryEngine
+from llama_index.core.retrievers import BaseRetriever
+from llama_index.multi_modal_llms.openai import OpenAIMultiModal
+from llama_index.core.schema import ImageNode, NodeWithScore, MetadataMode
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.base.response.schema import Response
+from typing import Optional
+from core.prompt import MULTOMODAL_QUERY_TEMPLATE
+gpt_4o = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096)
+QA_PROMPT = PromptTemplate(MULTOMODAL_QUERY_TEMPLATE)
+class MultimodalQueryEngine(CustomQueryEngine):
+    """Custom multimodal Query Engine.
+    Takes in a retriever to retrieve a set of document nodes.
+    Also takes in a prompt template and multimodal model.
+    """
+    qa_prompt: PromptTemplate
+    retriever: BaseRetriever
+    multi_modal_llm: OpenAIMultiModal
+    def __init__(self, qa_prompt: Optional[PromptTemplate] = None, **kwargs) -> None:
+        """Initialize."""
+        super().__init__(qa_prompt=qa_prompt or QA_PROMPT, **kwargs)
+    def custom_query(self, query_str: str):
+        # retrieve text nodes
+        nodes = self.retriever.retrieve(query_str)
+        # create ImageNode items from text nodes
+        image_nodes = [
+            NodeWithScore(node=ImageNode(image_url=link))
+            for n in nodes
+            if "image_link" in n.metadata
+            and n.metadata["image_link"] not in ["", []]
+            for link in (n.metadata["image_link"] if isinstance(n.metadata["image_link"], list) else [n.metadata["image_link"]])
+            if link not in ["", []]
+        ]
+        print("image_nodes: {}".format(image_nodes))
+        # create context string from text nodes, dump into the prompt
+        context_str = "\n\n".join(
+            [r.get_content(metadata_mode=MetadataMode.LLM) for r in nodes]
+        )
+        fmt_prompt = self.qa_prompt.format(context_str=context_str, query_str=query_str)
+        # synthesize an answer from formatted text and images
+        llm_response = self.multi_modal_llm.complete(
+            prompt=fmt_prompt,
+            image_documents=[image_node.node for image_node in image_nodes],
+        )
+        return Response(
+            response=str(llm_response),
+            source_nodes=nodes,
+            metadata={"text_nodes": nodes, "image_nodes": image_nodes},
+        )

core/parser.py CHANGED Viewed

@@ -64,7 +64,6 @@ def update_response(text):
     return text
 def renumber_sources(source_list):
     new_sources = []
     for i, source in enumerate(source_list):
@@ -74,7 +73,6 @@ def renumber_sources(source_list):
         new_sources.append(f"source {i+1}: {content}")
     return new_sources
 def sort_and_renumber_sources(source_list):
     """
     This function takes a list of sources, sorts them based on the source number,
@@ -98,6 +96,7 @@ def sort_and_renumber_sources(source_list):
     return sorted_sources
 def seperate_to_list(text):
     # Step 1: Split the text by line breaks (\n)
     lines = text.split("\n")
@@ -123,7 +122,7 @@ def join_list(items):
         return f"{items[0]} and {items[1]}"
     else:
         return ", ".join(items[:-1]) + " and " + items[-1]
 def redesign_structure_message(message, metadata):
     """
     This function replaces occurrences of '[n]' in the message
@@ -143,4 +142,31 @@ def redesign_structure_message(message, metadata):
     # Use regex to find all citations in the format '[n]'
     redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
-    return redesigned_message

     return text
 def renumber_sources(source_list):
     new_sources = []
     for i, source in enumerate(source_list):
         new_sources.append(f"source {i+1}: {content}")
     return new_sources
 def sort_and_renumber_sources(source_list):
     """
     This function takes a list of sources, sorts them based on the source number,
     return sorted_sources
 def seperate_to_list(text):
     # Step 1: Split the text by line breaks (\n)
     lines = text.split("\n")
         return f"{items[0]} and {items[1]}"
     else:
         return ", ".join(items[:-1]) + " and " + items[-1]
 def redesign_structure_message(message, metadata):
     """
     This function replaces occurrences of '[n]' in the message
     # Use regex to find all citations in the format '[n]'
     redesigned_message = re.sub(r'\[(\d+)\]', replace_citation, message)
+    return redesigned_message
+def extract_sorted_page_numbers(content):
+    # Regular expression pattern to match page references like [p-166], [p-163], etc.
+    page_pattern = r'\[p-(\d+)\]'
+    # Find all matches (page numbers) in the content
+    page_numbers = re.findall(page_pattern, content)
+    # Convert the found page numbers into integers, remove duplicates, and sort them
+    return sorted(set(map(int, page_numbers)))  # Use set to remove duplicates and sorted to sort them
+# Method to filter and create a new list with the relevant page numbers [163, 165, 166]
+def filter_metadata_by_pages(metadata, pages):
+    if pages and metadata:
+        combined_metadata = [{
+            "page_number": pages,
+            "title": metadata[0]["title"],  # All entries share the same title
+            "author": metadata[0]["author"],  # All entries share the same author
+            "category": metadata[0]["category"],  # All entries share the same category
+            "year": metadata[0]["year"],  # All entries share the same year
+            "publisher": metadata[0]["publisher"],  # All entries share the same publisher
+            "reference": metadata[0]["reference"]  # All entries share the same reference
+        }]
+        return combined_metadata
+    else:
+        return []

core/prompt.py CHANGED Viewed

@@ -1,5 +1,38 @@
 SYSTEM_BOT_TEMPLATE = """
-Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu hanya memberikan informasi dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
 **Instruksi**:
@@ -14,6 +47,121 @@ Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis
  5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
 """
 ADDITIONAL_INFORMATIONS = """
 Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
 """
@@ -49,6 +197,7 @@ Kamu juga harus memperhatikan instruksi :
  - "Dapatkan buku ini sekarang dan tingkatkan pemahaman Anda tentang kesehatan 😊"
 """
 SYSTEM_TOPIC_TEMPLATE = """
 You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
 """
@@ -147,7 +296,6 @@ Your task is to extract and organize metadata for the {class_name}. Follow the i
    - **How:** Structure the entries clearly and precisely as attributes of the class.
    - **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
 """
 SUMMARIZER_SYSTEM_TEMPLATE = """
 """

+PARSER_INSTRUCTION = """
+You are a highly proficient language model designed to convert pages from PDF, PPT and other files into structured markdown text. Your goal is to accurately transcribe text and identify and describe images, particularly graphs and other graphical elements.
+You have been tasked with creating a markdown copy of each page from the provided PDF or PPT image. You should write the number of the figure, and keep it in your markdown text. Each image description must include a full description of the content, a summary of the graphical object.
+Maintain the sequence of all the elements.
+For the following element, follow the requirement of extraction:
+for Text:
+   - Extract all readable text from the page.
+   - Exclude any diagonal text, headers, and footers.
+for Text which includes hyperlink:
+    -Extract hyperlink and present it with the text
+for Image Identification and Description:
+   - Identify all images, graphs, and other graphical elements on the page.
+   - For each image or graph, note the figure number and include it in the description as "Figure X" where X is the figure number.
+   - If the image has graph , extract the graph as image . DO NOT convert it into a table or extract the wording inside the graph.
+   - If image contains wording that is hard to extract , flag it with <unidentifiable section> instead of parsing.
+   - If the image has a subtitle or caption, include it in the description.
+   - If the image has a organisation chart , convert it into a hierachical understandable format.
+   - If the image contain process flow , capture it as a whole image instead of separate into blocks of images.
+for Table:
+   - Try to retain the columns and structure of the table and extract it into markdown format.
+# OUTPUT INSTRUCTIONS
+- Exclude any diagonal text, headers, and footers from the output.
+- For each image and graph, provide a detailed description,caption if there's any and summary. Clearly denote the figure number for each image in the format "Figure X" if it is noticed in the context.
+"""
 SYSTEM_BOT_TEMPLATE = """
+Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu hanya memberikan informasi dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. Jika bertanya tentang rangkuman, cukup rangkum apa yang kamu tahu pada konteks yang kamu miliki. Jika bertanya tentang daftar isi, pastikan kamu melihatnya dari teks pendahuluan atau content tablenya. {additional_information} Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi dari apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli.
 **Instruksi**:
  5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
 """
+SYSTEM_BOT_IMAGE_TEMPLATE = """
+Kamu adalah Medbot yang selalu menggunakan tools untuk menjawab pertanyaan medis. Jika pengguna bertanya tentang topik non-medis, arahkan mereka untuk bertanya di bidang medis. Tugasmu adalah memberikan jawaban yang informatif dan akurat berdasarkan tools yang tersedia. Pastikan kamu menggunakan tool, jangan mencoba coba untuk menerka jawaban dari pengetahuanmu kemudian pastikan kamu hanya memberikan informasi dari tool dari buku yang telah disediakan, jangan sampai menjawab pertanyaan yang tidak terdapat dalam buku atau tools yang kamu gunakan. {additional_information}  Jika setelah itu tidak ada informasi yang ditemukan, katakan bahwa kamu tidak mengetahuinya dan berikan informasi secara singkat apa yang kamu ketahui kemudian arahkan pengguna untuk bertanya ke dokter yang lebih ahli. Selalu cantumkan citation halamannya dari konteks yang kamu ambil berdasarkan format yang ada. Yaitu menggunakan [p-no.halaman] di akhir kutipan yang kamu ambil.
+**Instruksi**:
+ 1. **Jawaban Berdasarkan Tools**: Jika pengguna bertanya tentang topik kedokteran, gunakanlah tools yang tersedia untuk memberikan jawaban. Pastikan jawabanmu relevan dan sesuai dengan informasi dari tools tersebut. Jelaskan informasi dengan jelas dan lengkap. Jika ada tabel, boleh anda tampilkan tabel nya untuk menyampaikan data data yang jelas berdasarkan konteks buku.
+ 2. **Referensi dan Kutipan**:
+  -  Jika di konteks terdapat gambar maka anda perlu menampilkan gambar dengan cantumkan link gambarnya yang ditampilkan di markdown**, serta tampilkan di halaman berapa gambar tersebut diambil contoh :
+    * Based on the images: <br>
+![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/test/img_p9_1.png) --> kamu tetap harus menampilkan tampilan gambar yang di markdown sehingga menjadi :
+    Pada gambar :\n
+    ![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/test/img_p9_1.png)
+    \nsumber : [p-10]
+    Jika memang tidak ada gambar pada konteks yang dibutuhkan oleh pertanyaan, anda cukup tambahkan bahwa anda tidak memiliki gambar pada konteks pertanyaan. Pastikan gambarnya sesuai dengan apa yang ditanyakan, tidak membuat gambar yang tidak sesuai dengan konteks.
+  - Jangan menghapus sumber kutipan baik berupa citasi atau halaman (page number) dari teks yang diberikan. Contohnya, jika teksnya adalah "Ilmu kedokteran sangat dibutuhkan [p-2]", pastikan untuk menyertakan kutipan sumbernya yaitu [p-2] dalam jawabanmu. Contoh lain: :
+     *  Water is wet when the sky is red [p-11]. --> kamu harus tetap cantumkan [p-11] pada jawaban yang kamu generate
+     *  source :
+        page_number : 12
+        The sky is red in the evening and blue in the morning. --> Kamu harus mengubahnya menjadi sesuai dengan page numbernya atau citationnya sehingga menjadi :
+        The sky is red in the evening and blue in the morning [p-12].
+ 3. **Ketika Tidak Tahu Jawaban**: Jika pertanyaan pengguna tidak dapat dijawab dengan menggunakan tools ini, sampaikan dengan sopan bahwa kamu tidak memiliki jawaban untuk pertanyaan tersebut. Arahkan pengguna untuk mencari informasi lebih lanjut atau bertanya pada ahli di bidang kedokteran.
+ 4. **Gaya Jawaban**: Berikan jawaban dengan gaya yang ramah dan profesional. Sampaikan informasi secara naratif agar lebih mudah dipahami. Boleh menggunakan point point dan uraiannya agar bisa menjelaskan informasi yang kompleks sehingga mudah dipahami. Gunakan kata 'dok' atau 'dokter' untuk merujuk pada dokter, dan hindari kesan monoton dengan menambahkan emotikon jika sesuai seperti 😁, 😊, 🙌, 😉, 😀, 🤔, 😇.
+ 5. **Penutup**: Akhiri komunikasi dengan kalimat yang friendly, seperti "Semoga informasi ini bermanfaat, dok ✨" atau "Jika ada pertanyaan lain, jangan ragu untuk bertanya ya dok 😊" dan sebagainya.
+"""
+MULTOMODAL_QUERY_TEMPLATE = """\
+Below is parsed text from books, available in two formats: 'markdown' (which organizes relevant diagrams as tables) and 'raw text' (preserving the rough spatial layout of the original text). Additionally, image references from the book are provided.
+### Instructions:
+1. **Use image information as the primary source**: Reference the **image URL** to explain your answer, if possible.
+2. **Only use parsed text** (markdown or raw) **if the image does not provide a clear answer**.
+3. **Always cite the page number** for any information referenced. Please give the page number after the text that you cited, the format is : [p-no.page]
+4. **Provide the image inline in the answer** by linking directly to the AWS S3 image URL provided for easy viewing.
+5. Ensure that the AWS link represented by [title] matches the book's title and that the link or URL is provided in the context. Never include a link or URL that is not present in the context. If no link is available, simply state: "Apologies, the image or content you are referring to is not available in this context.
+### Example:
+**Sources Provided:**
+**Source 1:**
+- Page number: 10
+- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/[title]/img_p9_1.png`
+- contoh judul : blue sky, so that the link should be :
+- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p9_1.png`
+- Text: "The sky is red in the evening and blue in the morning. [p-10]"
+**Source 2:**
+- Page number: 11
+- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/[title]/img_p10_1.png`
+- contoh judul : blue sky, so that the link should be :
+- Image URL: `https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p10_1.png`
+- Text: "Water is wet when the sky is red. [p-11]"
+**Query:** When is water wet?
+**Answer:**
+Based on the images:\n
+![figure-10](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p9_1.png)\n
+[p-10]
+and \n
+![figure-11](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/blue+sky/img_p10_1.png)\n,
+[p=11]\n
+water is wet when the sky is red in the evening [p-10, p-11].
+**Sources Provided:**
+Source 1:
+Page number: 15
+Image URL: https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p14_1.png
+Text: "Plants grow best in blue light but struggle in red light."
+Source 2:
+Page number: 16
+Image URL: https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p15_1.png
+Text: "Optimal light conditions for plant growth are illustrated in Figure 16."
+Query:
+What color of light is best for plant growth?
+Answer:
+When we look in the image :\n
+![figure-15](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p14_1.png)
+\n[p-15]
+Plants grow best under blue light, as shown in the color-coded illustration in the image [p-15].
+And the optimal light condition will be shown it the figure : \n
+![figure-16](https://book-images-multimedika.s3.us-west-2.amazonaws.com/images/green+plant/img_p15_1.png)
+\n[p-16]
+---
+**Now, please answer the following query based on the sources provided:**
+---
+**Sources:**
+{context_str}
+**Query:**
+{query_str}
+**Answer:**
+"""
 ADDITIONAL_INFORMATIONS = """
 Kemudian, kamu menjawab pertanyan user dari buku {titles}, jadi jika user bertaya kamu pastikan akan mengacu buku tersebut yang didapatkan dari tools dari yang kamu punya.
 """
  - "Dapatkan buku ini sekarang dan tingkatkan pemahaman Anda tentang kesehatan 😊"
 """
 SYSTEM_TOPIC_TEMPLATE = """
 You are tasked with analyzing a table of contents from a book. Your goal is to identify and extract the main topics and subtopics. Please provide a clear and organized list of these topics and subtopics. The list should reflect the structure and hierarchy presented in the table of contents.
 """
    - **How:** Structure the entries clearly and precisely as attributes of the class.
    - **Tip:** Use precise language to capture the relationship between the main topic and subtopic, ensuring clarity and ease of reference for future use.
 """
 SUMMARIZER_SYSTEM_TEMPLATE = """
 """

db/database.py CHANGED Viewed

@@ -13,9 +13,6 @@ load_dotenv()
 SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
-# Get the base64 encoded certificate from the environment variable
-ca_cert_base64 = os.getenv("CA_CERT_BASE64")
 # Retrieve the Base64-encoded CA certificate from the environment variable
 ca_cert_base64 = os.getenv("CA_CERT_BASE64")

 SQLALCHEMY_DATABASE_URL = MYSQL_CONFIG.DB_URI_SQL_ALCHEMY
 # Retrieve the Base64-encoded CA certificate from the environment variable
 ca_cert_base64 = os.getenv("CA_CERT_BASE64")

db/db.py DELETED Viewed

@@ -1,124 +0,0 @@
-# Experimental
-from sqlalchemy import Column, String, Enum, ForeignKey, DateTime
-from sqlalchemy.dialects.postgresql import UUID, ENUM, JSONB
-from sqlalchemy.orm import relationship
-from sqlalchemy.sql import func
-from enum import Enum
-from sqlalchemy.ext.declarative import as_declarative, declared_attr
-from llama_index.core.callbacks.schema import CBEventType
-# Model
-@as_declarative()
-class Base:
-    id = Column(UUID, primary_key=True, index=True, default=func.uuid_generate_v4())
-    created_at = Column(DateTime, server_default=func.now(), nullable=False)
-    updated_at = Column(
-        DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
-    )
-    __name__: str
-    # Generate __tablename__ automatically
-    @declared_attr
-    def __tablename__(cls) -> str:
-        return cls.__name__.lower()
-# DB
-class MessageRoleEnum(str, Enum):
-    user = "user"
-    assistant = "assistant"
-class MessageStatusEnum(str, Enum):
-    PENDING = "PENDING"
-    SUCCESS = "SUCCESS"
-    ERROR = "ERROR"
-class MessageSubProcessStatusEnum(str, Enum):
-    PENDING = "PENDING"
-    FINISHED = "FINISHED"
-# python doesn't allow enums to be extended, so we have to do this
-additional_message_subprocess_fields = {
-    "CONSTRUCTED_QUERY_ENGINE": "constructed_query_engine",
-    "SUB_QUESTIONS": "sub_questions",
-}
-MessageSubProcessSourceEnum = Enum(
-    "MessageSubProcessSourceEnum",
-    [(event_type.name, event_type.value) for event_type in CBEventType]
-    + list(additional_message_subprocess_fields.items()),
-)
-def to_pg_enum(enum_class) -> ENUM:
-    return ENUM(enum_class, name=enum_class.__name__)
-class Document(Base):
-    """
-    A document along with its metadata
-    """
-    # URL to the actual document (e.g. a PDF)
-    url = Column(String, nullable=False, unique=True)
-    metadata_map = Column(JSONB, nullable=True)
-    conversations = relationship("ConversationDocument", back_populates="document")
-class Conversation(Base):
-    """
-    A conversation with messages and linked documents
-    """
-    messages = relationship("Message", back_populates="conversation")
-    conversation_documents = relationship(
-        "ConversationDocument", back_populates="conversation"
-    )
-class ConversationDocument(Base):
-    """
-    A many-to-many relationship between a conversation and a document
-    """
-    conversation_id = Column(
-        UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
-    )
-    document_id = Column(UUID(as_uuid=True), ForeignKey("document.id"), index=True)
-    conversation = relationship("Conversation", back_populates="conversation_documents")
-    document = relationship("Document", back_populates="conversations")
-class Message(Base):
-    """
-    A message in a conversation
-    """
-    conversation_id = Column(
-        UUID(as_uuid=True), ForeignKey("conversation.id"), index=True
-    )
-    content = Column(String)
-    role = Column(to_pg_enum(MessageRoleEnum))
-    status = Column(to_pg_enum(MessageStatusEnum), default=MessageStatusEnum.PENDING)
-    conversation = relationship("Conversation", back_populates="messages")
-    sub_processes = relationship("MessageSubProcess", back_populates="message")
-class MessageSubProcess(Base):
-    """
-    A record of a sub-process that occurred as part of the generation of a message from an AI assistant
-    """
-    message_id = Column(UUID(as_uuid=True), ForeignKey("message.id"), index=True)
-    source = Column(to_pg_enum(MessageSubProcessSourceEnum))
-    message = relationship("Message", back_populates="sub_processes")
-    status = Column(
-        to_pg_enum(MessageSubProcessStatusEnum),
-        default=MessageSubProcessStatusEnum.FINISHED,
-        nullable=False,
-    )
-    metadata_map = Column(JSONB, nullable=True)

db/delete_data.py DELETED Viewed

@@ -1,22 +0,0 @@
-import logging
-from db.repository import Repository, get_db_conn
-# Setup logging (configure as needed)
-logging.basicConfig(level=logging.INFO)
-class DeleteDatabase(Repository):
-    async def delete_record(self, params):
-        if "id" not in params:
-            raise ValueError("The 'id' parameter is required.")
-        query = """
-            DELETE FROM metadata
-            WHERE id = :id
-        """
-        try:
-            await self._exec(query, params)
-            logging.info(f"Record with id {params['id']} deleted successfully.")
-        except Exception as e:
-            logging.error(f"Error deleting record with id {params['id']}: {e}")
-            raise

db/fetching.py CHANGED Viewed

@@ -13,7 +13,7 @@ class DataFetching:
         user_meta_entries = user_meta_query.get_user_meta_entries(self.db)
         if not user_meta_entries or user_meta_entries==[]:
-            return {"info": "No book collection found"}
         # Extract relevant data from the user_meta_entries
@@ -31,8 +31,6 @@ class DataFetching:
             for user_meta, metadata, category in user_meta_entries  # Unpack the tuple
         ]
-        print("Hasil akhir ", results)
         # Extract relevant data from the user_meta_entries
         return results
@@ -42,7 +40,6 @@ class DataFetching:
         return [
             MetadataResponse(
-                status="success",
                 id = id,
                 title=title,
                 author=author,

         user_meta_entries = user_meta_query.get_user_meta_entries(self.db)
         if not user_meta_entries or user_meta_entries==[]:
+            return []
         # Extract relevant data from the user_meta_entries
             for user_meta, metadata, category in user_meta_entries  # Unpack the tuple
         ]
         # Extract relevant data from the user_meta_entries
         return results
         return [
             MetadataResponse(
                 id = id,
                 title=title,
                 author=author,

db/get_data.py DELETED Viewed

@@ -1,69 +0,0 @@
-import logging
-from db.repository import Repository, get_db_conn
-from fastapi.responses import JSONResponse
-# Setup logging (configure as needed)
-logging.basicConfig(level=logging.INFO)
-class GetDatabase(Repository):
-    async def execute_query(self, query, params=None, fetch_one=False):
-        """
-        Helper function to execute SQL queries and handle exceptions.
-        """
-        try:
-            print(fetch_one)
-            if fetch_one:
-                results = await self._fetch_one(query, params)
-                print(results)
-            else:
-                results = await self.get_by_query(query, params)
-                print("result execute query : ", results)
-            return results if results else None
-        except Exception as e:
-            logging.error(f"An error occurred while executing query: {e}")
-            return JSONResponse(status_code=500, content=f"An error occurred while executing query: {e}")
-    async def get_data(self, title):
-        """
-        Fetch the first result matching the given title from the metadata table.
-        """
-        query = """
-        SELECT * FROM metadata
-        WHERE title = %s
-        limit 5;
-        """
-        try:
-            results = await self.execute_query(query, (title,), fetch_one=True)
-            return results
-        except Exception as e:
-            logging.error(f"An error occurred while get data: {e}")
-            return JSONResponse(status_code=500, content=f"An error occurred while get data: {e}")
-    async def get_all_data(self):
-        """
-        Fetch all data from the metadata table.
-        """
-        query = """
-        SELECT * FROM metadata
-        """
-        results = await self.execute_query(query)
-        print("result", results)
-        return results
-    async def get_data_by_id(self, id):
-        query = f"""
-        SELECT * FROM metadata WHERE id = :id
-        """
-        param = {"id" : id}
-        try:
-            results = await self.execute_query(query, param)
-            print('Query successful, results: %s', results)
-            return results[0] if results else None
-        except Exception as e:
-            print('Error fetching data by ID %s: %s', id, e)
-            return JSONResponse(status_code=500, content=f"An error while fething data: {e}")

db/query/base_query.py CHANGED Viewed

@@ -53,9 +53,8 @@ class BaseQuery:
         """Delete an entry by ID with optional filter conditions."""
         # Build the query to select the entry
         query = select(model)
-        if id :
             query = query.where(model.id == id)
         if filter_conditions:
             query = query.where(*filter_conditions)
@@ -65,9 +64,7 @@ class BaseQuery:
             return entry
         # Build the delete query
-        delete_query = delete(model)
-        if id :
-            delete_query = delete_query.where(model.id == id)
         if filter_conditions:
             delete_query = delete_query.where(*filter_conditions)

         """Delete an entry by ID with optional filter conditions."""
         # Build the query to select the entry
         query = select(model)
+        if id:
             query = query.where(model.id == id)
         if filter_conditions:
             query = query.where(*filter_conditions)
             return entry
         # Build the delete query
+        delete_query = delete(model).where(model.id == id)
         if filter_conditions:
             delete_query = delete_query.where(*filter_conditions)

db/query/query_book.py CHANGED Viewed

@@ -41,7 +41,6 @@ class BookQuery(BaseQuery):
             join_conditions=join_conditions,
             multiple=True,
         )
-        print("result", result)
         return result

             join_conditions=join_conditions,
             multiple=True,
         )
         return result

db/query/query_user_meta.py CHANGED Viewed

@@ -6,7 +6,7 @@ from db.query.base_query import BaseQuery
 class UserMetaQuery(BaseQuery):
     def __init__(self, user):
         super().__init__(user)
     def get_user_meta_entries(self, db):
         """Fetch all user meta entries joined with metadata and category."""
         join_models = [Metadata, Category]
@@ -14,7 +14,7 @@ class UserMetaQuery(BaseQuery):
             User_Meta.metadata_id == Metadata.id,
             Metadata.category_id == Category.id,
         ]
         filter_conditions = [User_Meta.user_id == self.user_id]
         result = self.get_with_joins(
@@ -23,10 +23,10 @@ class UserMetaQuery(BaseQuery):
             join_models=join_models,
             join_conditions=join_conditions,
             filter_conditions=filter_conditions,
-            multiple=True,
         )
         return result
     def insert_user_meta_entries(self, db, metadata_ids):
         """Insert new user meta entries if they don't already exist."""
         # Fetch existing metadata IDs for the user
@@ -58,14 +58,13 @@ class UserMetaQuery(BaseQuery):
             "metadata_ids": new_metadata_ids,  # Include only new metadata IDs in the result
         }
     def update_user_meta_entries(self, db, metadata_ids):
         """Update user meta entries: keep, delete, or add new entries based on metadata_ids."""
         filter_conditions = [User_Meta.user_id == self.user_id]
         # Fetch existing user meta entries
-        existing_user_meta = self.get(
-            db, model=User_Meta, filter_conditions=filter_conditions, multiple=True
-        )
         existing_user_meta = [user_meta[0] for user_meta in existing_user_meta]
         existing_meta_ids = [entry.metadata_id for entry in existing_user_meta]
@@ -80,10 +79,7 @@ class UserMetaQuery(BaseQuery):
         # Delete entries that are no longer in the updated metadata_ids list
         if metadata_to_delete:
-            db.query(User_Meta).filter(
-                User_Meta.user_id == self.user_id,
-                User_Meta.metadata_id.in_(metadata_to_delete),
-            ).delete(synchronize_session=False)
         # Add new entries for metadata that are not in the existing user meta
         for meta_id in metadata_to_add:
@@ -91,7 +87,7 @@ class UserMetaQuery(BaseQuery):
             self.add(db, new_entry)
         db.commit()
         return {
             "status": "success",
             "added_meta": list(metadata_to_add),
@@ -101,15 +97,10 @@ class UserMetaQuery(BaseQuery):
     def delete_user_meta(self, db, metadata_id):
         """Delete user meta entries by metadata_id."""
-        filter_conditions = [
-            User_Meta.metadata_id == metadata_id,
-            User_Meta.user_id == self.user_id,
-        ]
         self.delete(db, model=User_Meta, filter_conditions=filter_conditions)
-        return {
-            "status": "success",
-            "message": f"Book user with id {metadata_id} deleted successfully.",
-        }
     def delete_all_user_meta(self, db):
         """Delete all user meta entries for a user."""

 class UserMetaQuery(BaseQuery):
     def __init__(self, user):
         super().__init__(user)
     def get_user_meta_entries(self, db):
         """Fetch all user meta entries joined with metadata and category."""
         join_models = [Metadata, Category]
             User_Meta.metadata_id == Metadata.id,
             Metadata.category_id == Category.id,
         ]
         filter_conditions = [User_Meta.user_id == self.user_id]
         result = self.get_with_joins(
             join_models=join_models,
             join_conditions=join_conditions,
             filter_conditions=filter_conditions,
+            multiple=True
         )
         return result
     def insert_user_meta_entries(self, db, metadata_ids):
         """Insert new user meta entries if they don't already exist."""
         # Fetch existing metadata IDs for the user
             "metadata_ids": new_metadata_ids,  # Include only new metadata IDs in the result
         }
     def update_user_meta_entries(self, db, metadata_ids):
         """Update user meta entries: keep, delete, or add new entries based on metadata_ids."""
         filter_conditions = [User_Meta.user_id == self.user_id]
         # Fetch existing user meta entries
+        existing_user_meta = self.get(db, model=User_Meta, filter_conditions=filter_conditions, multiple=True)
         existing_user_meta = [user_meta[0] for user_meta in existing_user_meta]
         existing_meta_ids = [entry.metadata_id for entry in existing_user_meta]
         # Delete entries that are no longer in the updated metadata_ids list
         if metadata_to_delete:
+            db.query(User_Meta).filter(User_Meta.user_id == self.user_id, User_Meta.metadata_id.in_(metadata_to_delete)).delete(synchronize_session=False)
         # Add new entries for metadata that are not in the existing user meta
         for meta_id in metadata_to_add:
             self.add(db, new_entry)
         db.commit()
         return {
             "status": "success",
             "added_meta": list(metadata_to_add),
     def delete_user_meta(self, db, metadata_id):
         """Delete user meta entries by metadata_id."""
+        filter_conditions = [User_Meta.metadata_id==metadata_id,
+                             User_Meta.user_id==self.user_id]
         self.delete(db, model=User_Meta, filter_conditions=filter_conditions)
+        return {"message": f"Book user with id {metadata_id} deleted successfully."}
     def delete_all_user_meta(self, db):
         """Delete all user meta entries for a user."""

db/save_data.py DELETED Viewed

@@ -1,39 +0,0 @@
-from databases import Database
-import logging
-from dotenv import load_dotenv
-from db.repository import Repository
-load_dotenv()
-class InsertDatabase(Repository):
-    # Example function to insert data asynchronously
-    async def insert_data(self, params, category_id):
-        # SQL insert query with named placeholders
-        query = """
-        INSERT INTO metadata (title, category_id, author, year, publisher)
-        VALUES (:title, :category_id, :author, :year, :publisher)
-        """
-        reference = {
-            "title": params["title"],
-            "category_id": category_id,  # directly assign category_id
-            "author": params["author"],
-            "year": params["year"],
-            "publisher": params["publisher"]
-        }
-        print(reference)
-        try:
-            # Execute the query with the provided values
-            await self._exec(query, reference)
-            logging.info(
-                f"Data inserted successfully: {reference['title']}, {reference['author']}"
-            )
-        except Exception as e:
-            # Log any errors that occur during the database insert operation
-            logging.error(f"Failed to insert data: {e}")
-            raise  # Re-raise the exception to allow further handling if needed

db/update_data.py DELETED Viewed

@@ -1,35 +0,0 @@
-import logging
-from db.repository import Repository, get_db_conn
-# Setup logging (configure as needed)
-logging.basicConfig(level=logging.INFO)
-class UpdateDatabase(Repository):
-    async def update_record(self, reference):
-        print("update record", reference)
-        if "id" not in reference:
-            raise ValueError("The 'id' parameter is required.")
-        query = """
-            UPDATE metadata
-            SET title = :title,
-                category_id = :category_id,
-                author = :author,
-                year = :year,
-                publisher = :publisher
-            WHERE id = :id
-        """
-        print(query)
-        print(reference)
-        try:
-            await self._exec(query, reference)
-            logging.info(
-                f"Record with id {reference['id']} updated successfully."
-            )
-        except Exception as e:
-            logging.error(
-                f"Error updating  record with id {reference['id']}: {e}"
-            )
-            raise

helper/bot_function.py DELETED Viewed

File without changes

helper/db_function.py DELETED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -53,12 +53,6 @@ joblib==1.4.2
 jose==1.0.0
 jsonpatch==1.33
 jsonpointer==3.0.0
-kubernetes==30.1.0
-langchain==0.3.0
-langchain-community==0.3.0
-langchain-core==0.3.1
-langchain-openai==0.2.0
-langchain-text-splitters==0.3.0
 langchainhub==0.1.21
 langfuse==2.48.1
 langsmith==0.1.123

 jose==1.0.0
 jsonpatch==1.33
 jsonpointer==3.0.0
 langchainhub==0.1.21
 langfuse==2.48.1
 langsmith==0.1.123

research/delete.ipynb CHANGED Viewed

@@ -33,7 +33,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -54,17 +54,19 @@
     "\n",
     "load_dotenv()\n",
     "\n",
-    "api_key = os.getenv(\"PINECONE_API_KEY\")\n",
-    "\n",
     "pc = Pinecone(api_key=api_key)\n",
-    "index = pc.Index(\"summarizer-semantic-index\")\n",
     "\n",
-    "random_vector = [random.uniform(0, 1) for _ in range(1536)]\n",
     "results = index.query(\n",
     "    vector=random_vector,\n",
     "    top_k=10000,\n",
     "    filter={\n",
-    "        \"title\": {\"$eq\": \"test\"},\n",
     "    },\n",
     ")\n",
     "\n",

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
     "\n",
     "load_dotenv()\n",
     "\n",
+    "# api_key = os.getenv(\"PINECONE_API_KEY\")\n",
+    "api_key = \"pcsk_aZM8H_P9cK1nfUghBNJfiAhvRM6zgfgiBsHhtJDwydZaXZp47pKSQBFP6J7rmVPwqDYHW\"\n",
     "pc = Pinecone(api_key=api_key)\n",
+    "# index = pc.Index(\"summarizer-semantic-index\")\n",
+    "index = pc.Index(\"multimedika\")\n",
     "\n",
+    "# random_vector = [random.uniform(0, 1) for _ in range(1536)]\n",
+    "random_vector = [random.uniform(0, 1) for _ in range(768)]\n",
     "results = index.query(\n",
     "    vector=random_vector,\n",
     "    top_k=10000,\n",
     "    filter={\n",
+    "        \"url\": {\"$eq\": \"test\"},\n",
     "    },\n",
     ")\n",
     "\n",

research/llama_parse.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff