Spaces:
Sleeping
Sleeping
Commit
·
69beac6
1
Parent(s):
b39c0ba
fix : change parsing
Browse files- api/function.py +5 -5
- api/router/book.py +4 -4
- api/router/category.py +19 -22
- api/router/testing.py +82 -0
- core/chat/bot_service.py +2 -2
- core/parser.py +23 -0
- db/query/base_query.py +15 -8
- db/query/query_book.py +6 -10
- db/query/query_bot.py +7 -1
- db/query/query_category.py +23 -7
- db/query/query_user_meta.py +0 -18
- db/repository.py +0 -36
- script/document_uploader.py +25 -22
- service/reader_v3.py +86 -0
- utils/error_handlers.py +3 -3
api/function.py
CHANGED
@@ -29,12 +29,12 @@ async def data_ingestion(reference, file: UploadFile) -> Any:
|
|
29 |
user_id="admin_book_uploaded",
|
30 |
)
|
31 |
|
32 |
-
# Upload to AWS
|
33 |
-
file_name = f"{reference['title']}"
|
34 |
-
aws_loader = Loader()
|
35 |
|
36 |
-
file_obj = file
|
37 |
-
aws_loader.upload_to_s3(file_obj, file_name)
|
38 |
|
39 |
uploader = Uploader(reference, file)
|
40 |
|
|
|
29 |
user_id="admin_book_uploaded",
|
30 |
)
|
31 |
|
32 |
+
# # Upload to AWS
|
33 |
+
# file_name = f"{reference['title']}"
|
34 |
+
# aws_loader = Loader()
|
35 |
|
36 |
+
# file_obj = file
|
37 |
+
# aws_loader.upload_to_s3(file_obj, file_name)
|
38 |
|
39 |
uploader = Uploader(reference, file)
|
40 |
|
api/router/book.py
CHANGED
@@ -64,10 +64,6 @@ async def upload_file(
|
|
64 |
if auth_response:
|
65 |
return auth_response
|
66 |
|
67 |
-
# Create a new Metadata object
|
68 |
-
book_query = BookQuery(user)
|
69 |
-
book_query.add_book(db, title, author, category_id, year, publisher)
|
70 |
-
|
71 |
logging.info("Database Inserted")
|
72 |
|
73 |
# Query the category based on category_id
|
@@ -85,6 +81,10 @@ async def upload_file(
|
|
85 |
|
86 |
# Process the file and handle data ingestion
|
87 |
response = await data_ingestion(reference, file)
|
|
|
|
|
|
|
|
|
88 |
|
89 |
return {
|
90 |
"filename": file.filename,
|
|
|
64 |
if auth_response:
|
65 |
return auth_response
|
66 |
|
|
|
|
|
|
|
|
|
67 |
logging.info("Database Inserted")
|
68 |
|
69 |
# Query the category based on category_id
|
|
|
81 |
|
82 |
# Process the file and handle data ingestion
|
83 |
response = await data_ingestion(reference, file)
|
84 |
+
|
85 |
+
# Create a new Metadata object
|
86 |
+
# book_query = BookQuery(user)
|
87 |
+
# book_query.add_book(db, title, author, category_id, year, publisher)
|
88 |
|
89 |
return {
|
90 |
"filename": file.filename,
|
api/router/category.py
CHANGED
@@ -4,6 +4,7 @@ from fastapi import APIRouter, Depends
|
|
4 |
|
5 |
from db.models import Category
|
6 |
from db.database import get_db
|
|
|
7 |
from service.dto import CategoryCreate
|
8 |
|
9 |
from script.vector_db import IndexManager
|
@@ -19,13 +20,19 @@ db_dependency = Annotated[Session, Depends(get_db)]
|
|
19 |
|
20 |
|
21 |
@router.get("/category")
|
22 |
-
async def
|
23 |
if user is None or user.get("role_id") != 1:
|
24 |
return JSONResponse(status_code=401, content="Authentication Failed")
|
25 |
|
26 |
try:
|
27 |
# Logic to retrieve all categories
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
if not categories:
|
30 |
return JSONResponse(status_code=404, content="No categories found")
|
31 |
|
@@ -43,7 +50,7 @@ async def get_all_categories(user: user_dependency, db: db_dependency):
|
|
43 |
|
44 |
|
45 |
@router.get("/category/{category_id}")
|
46 |
-
async def
|
47 |
user: user_dependency,
|
48 |
db: db_dependency,
|
49 |
category_id: int,
|
@@ -53,7 +60,8 @@ async def get_categories_by_ids(
|
|
53 |
|
54 |
try:
|
55 |
# Fetch categories based on the list of provided category_ids
|
56 |
-
|
|
|
57 |
|
58 |
if category is None:
|
59 |
return JSONResponse(status_code=404, content="No categories found for the given IDs")
|
@@ -75,21 +83,17 @@ async def create_category(user: user_dependency, db: db_dependency, category: Ca
|
|
75 |
|
76 |
try:
|
77 |
# Check if category already exists
|
78 |
-
|
79 |
-
|
80 |
-
)
|
81 |
-
if existing_category:
|
82 |
return JSONResponse(status_code=400, content="Category already exists")
|
83 |
-
|
84 |
-
#
|
85 |
-
|
86 |
-
|
87 |
-
db.commit()
|
88 |
-
db.refresh(new_category)
|
89 |
|
90 |
return {
|
91 |
"message": "Category created successfully",
|
92 |
-
"category_id": new_category.id,
|
93 |
}
|
94 |
|
95 |
except IntegrityError:
|
@@ -99,13 +103,6 @@ async def create_category(user: user_dependency, db: db_dependency, category: Ca
|
|
99 |
content="Database integrity error: possibly a duplicate entry.",
|
100 |
)
|
101 |
|
102 |
-
except SQLAlchemyError as e:
|
103 |
-
db.rollback()
|
104 |
-
return JSONResponse(
|
105 |
-
status_code=500, content="Database error occurred: " + str(e)
|
106 |
-
)
|
107 |
-
|
108 |
-
|
109 |
@router.put("/category/{category_id}")
|
110 |
async def update_category(
|
111 |
user: user_dependency, db: db_dependency, category_id: int, category: CategoryCreate
|
|
|
4 |
|
5 |
from db.models import Category
|
6 |
from db.database import get_db
|
7 |
+
from db.query.query_category import CategoryQuery
|
8 |
from service.dto import CategoryCreate
|
9 |
|
10 |
from script.vector_db import IndexManager
|
|
|
20 |
|
21 |
|
22 |
@router.get("/category")
|
23 |
+
async def get_all_categories_router(user: user_dependency, db: db_dependency):
|
24 |
if user is None or user.get("role_id") != 1:
|
25 |
return JSONResponse(status_code=401, content="Authentication Failed")
|
26 |
|
27 |
try:
|
28 |
# Logic to retrieve all categories
|
29 |
+
category_query = CategoryQuery(user)
|
30 |
+
categories = category_query.get_all_categories(db)
|
31 |
+
|
32 |
+
print(categories)
|
33 |
+
|
34 |
+
|
35 |
+
# categories = db.query(Category).all()
|
36 |
if not categories:
|
37 |
return JSONResponse(status_code=404, content="No categories found")
|
38 |
|
|
|
50 |
|
51 |
|
52 |
@router.get("/category/{category_id}")
|
53 |
+
async def get_category_by_id_router(
|
54 |
user: user_dependency,
|
55 |
db: db_dependency,
|
56 |
category_id: int,
|
|
|
60 |
|
61 |
try:
|
62 |
# Fetch categories based on the list of provided category_ids
|
63 |
+
category_query = CategoryQuery(user)
|
64 |
+
category = category_query.get_category_by_id(db, category_id)
|
65 |
|
66 |
if category is None:
|
67 |
return JSONResponse(status_code=404, content="No categories found for the given IDs")
|
|
|
83 |
|
84 |
try:
|
85 |
# Check if category already exists
|
86 |
+
category_query = CategoryQuery(user)
|
87 |
+
existing_category = category_query.get_existing_category(db, category.category_name)
|
88 |
+
if not isinstance(existing_category,JSONResponse):
|
|
|
89 |
return JSONResponse(status_code=400, content="Category already exists")
|
90 |
+
|
91 |
+
# Add category
|
92 |
+
category_query.add_category(db, category.category_name)
|
93 |
+
print("category added")
|
|
|
|
|
94 |
|
95 |
return {
|
96 |
"message": "Category created successfully",
|
|
|
97 |
}
|
98 |
|
99 |
except IntegrityError:
|
|
|
103 |
content="Database integrity error: possibly a duplicate entry.",
|
104 |
)
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
@router.put("/category/{category_id}")
|
107 |
async def update_category(
|
108 |
user: user_dependency, db: db_dependency, category_id: int, category: CategoryCreate
|
api/router/testing.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Depends, Form
|
2 |
+
from fastapi.security import OAuth2PasswordBearer
|
3 |
+
import httpx
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
app = FastAPI()
|
11 |
+
|
12 |
+
# Bearer token for API authentication
|
13 |
+
BEARER_TOKEN = os.getenv("MEDUCINE_API_BEARER_TOKEN")
|
14 |
+
|
15 |
+
# Base URL for the Meducine API
|
16 |
+
BASE_URL = os.getenv("BASE_URL")
|
17 |
+
|
18 |
+
# OAuth2PasswordBearer provides the token as a dependency
|
19 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/login")
|
20 |
+
|
21 |
+
@app.post("/login")
|
22 |
+
async def login(email: str = Form(...), password: str = Form(...)):
|
23 |
+
async with httpx.AsyncClient() as client:
|
24 |
+
try:
|
25 |
+
response = await client.post(
|
26 |
+
f"{BASE_URL}/actions/meducine-restapi/auth/login",
|
27 |
+
data={"email": email, "password": password},
|
28 |
+
headers={"Authorization": f"Bearer {BEARER_TOKEN}"}
|
29 |
+
)
|
30 |
+
response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
|
31 |
+
return handle_response(response) # Assuming this function formats the response correctly
|
32 |
+
except httpx.HTTPStatusError as e:
|
33 |
+
raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
|
34 |
+
except Exception as e:
|
35 |
+
raise HTTPException(status_code=500, detail=str(e))
|
36 |
+
|
37 |
+
@app.post("/actions/meducine-restapi/auth/logout")
|
38 |
+
async def logout(email: str = Form(...), password: str = Form(...)):
|
39 |
+
async with httpx.AsyncClient() as client:
|
40 |
+
response = await client.post(
|
41 |
+
f"{BASE_URL}/actions/meducine-restapi/auth/logout",
|
42 |
+
data={"email": email, "password": password},
|
43 |
+
headers={"Authorization": f"Bearer {BEARER_TOKEN}"}
|
44 |
+
)
|
45 |
+
return handle_response(response)
|
46 |
+
|
47 |
+
@app.get("/actions/meducine-restapi/auth/identity")
|
48 |
+
async def get_identity(token: str = Depends(oauth2_scheme)):
|
49 |
+
async with httpx.AsyncClient() as client:
|
50 |
+
response = await client.get(
|
51 |
+
f"{BASE_URL}/actions/meducine-restapi/auth/identity",
|
52 |
+
headers={"Authorization": f"Bearer {token}"}
|
53 |
+
)
|
54 |
+
return handle_response(response)
|
55 |
+
|
56 |
+
@app.get("/actions/meducine-restapi/user/has-premium-access")
|
57 |
+
async def check_premium_access(feature: str, token: str = Depends(oauth2_scheme)):
|
58 |
+
async with httpx.AsyncClient() as client:
|
59 |
+
response = await client.get(
|
60 |
+
f"{BASE_URL}/actions/meducine-restapi/user/has-premium-access",
|
61 |
+
params={"feature": feature},
|
62 |
+
headers={"Authorization": f"Bearer {token}"}
|
63 |
+
)
|
64 |
+
return handle_response(response)
|
65 |
+
|
66 |
+
def handle_response(response: httpx.Response):
|
67 |
+
"""
|
68 |
+
Handles the response from the Meducine API, returning appropriate responses based on status codes.
|
69 |
+
"""
|
70 |
+
if response.status_code in range(200, 300):
|
71 |
+
return response.json() # Successful request
|
72 |
+
elif response.status_code in range(400, 500):
|
73 |
+
raise HTTPException(status_code=response.status_code, detail=response.json()) # Client error
|
74 |
+
elif response.status_code in range(500, 600):
|
75 |
+
raise HTTPException(status_code=response.status_code, detail="Server error") # Server error
|
76 |
+
else:
|
77 |
+
raise HTTPException(status_code=500, detail="Unexpected error")
|
78 |
+
|
79 |
+
# Run the application
|
80 |
+
if __name__ == "__main__":
|
81 |
+
import uvicorn
|
82 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
core/chat/bot_service.py
CHANGED
@@ -10,7 +10,7 @@ from llama_index.core.llms import MessageRole
|
|
10 |
|
11 |
from core.chat.engine import Engine
|
12 |
from core.chat.chatstore import ChatStore
|
13 |
-
from core.parser import clean_text, update_response,
|
14 |
|
15 |
from service.dto import ChatMessage
|
16 |
from pymongo.mongo_client import MongoClient
|
@@ -47,7 +47,7 @@ class ChatCompletionService:
|
|
47 |
|
48 |
# Update response and renumber sources
|
49 |
response = update_response(str(response))
|
50 |
-
contents =
|
51 |
|
52 |
# Add contents to metadata
|
53 |
metadata_collection = self._attach_contents_to_metadata(contents, metadata_collection)
|
|
|
10 |
|
11 |
from core.chat.engine import Engine
|
12 |
from core.chat.chatstore import ChatStore
|
13 |
+
from core.parser import clean_text, update_response, sort_and_renumber_sources
|
14 |
|
15 |
from service.dto import ChatMessage
|
16 |
from pymongo.mongo_client import MongoClient
|
|
|
47 |
|
48 |
# Update response and renumber sources
|
49 |
response = update_response(str(response))
|
50 |
+
contents = sort_and_renumber_sources(contents)
|
51 |
|
52 |
# Add contents to metadata
|
53 |
metadata_collection = self._attach_contents_to_metadata(contents, metadata_collection)
|
core/parser.py
CHANGED
@@ -75,6 +75,29 @@ def renumber_sources(source_list):
|
|
75 |
return new_sources
|
76 |
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def seperate_to_list(text):
|
79 |
# Step 1: Split the text by line breaks (\n)
|
80 |
lines = text.split("\n")
|
|
|
75 |
return new_sources
|
76 |
|
77 |
|
78 |
+
def sort_and_renumber_sources(source_list):
|
79 |
+
"""
|
80 |
+
This function takes a list of sources, sorts them based on the source number,
|
81 |
+
and renumbers them sequentially starting from 1.
|
82 |
+
|
83 |
+
:param source_list: List of strings containing source information.
|
84 |
+
:return: Sorted and renumbered list of sources.
|
85 |
+
"""
|
86 |
+
|
87 |
+
# Function to extract source number
|
88 |
+
def extract_source_number(source):
|
89 |
+
match = re.search(r"Source (\d+)", source)
|
90 |
+
return int(match.group(1)) if match else float('inf')
|
91 |
+
|
92 |
+
# Sort sources based on the source number
|
93 |
+
sorted_sources = sorted(source_list, key=extract_source_number)
|
94 |
+
|
95 |
+
# Reassign the numbering in the sorted sources
|
96 |
+
for idx, source in enumerate(sorted_sources, 1):
|
97 |
+
sorted_sources[idx-1] = re.sub(r"Source \d+", f"Source {idx}", source)
|
98 |
+
|
99 |
+
return sorted_sources
|
100 |
+
|
101 |
def seperate_to_list(text):
|
102 |
# Step 1: Split the text by line breaks (\n)
|
103 |
lines = text.split("\n")
|
db/query/base_query.py
CHANGED
@@ -81,21 +81,30 @@ class BaseQuery:
|
|
81 |
|
82 |
def update(self, db, model, id, update_data, filter_conditions=None):
|
83 |
"""Update an entry by ID."""
|
|
|
84 |
query = select(model).where(model.id == id)
|
|
|
|
|
85 |
if filter_conditions:
|
86 |
-
query = query.where(*filter_conditions)
|
87 |
|
|
|
88 |
not_found_message = f"Entry with ID {id} not found."
|
89 |
entry = self._fetch(db, query, not_found_message, multiple=False)
|
90 |
|
|
|
91 |
if isinstance(entry, JSONResponse):
|
92 |
return entry
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
99 |
return self._handle_commit(db)
|
100 |
|
101 |
def update_entries(self, db, model, update_data, filter_conditions=None):
|
@@ -128,8 +137,6 @@ class BaseQuery:
|
|
128 |
else:
|
129 |
query = select(model)
|
130 |
|
131 |
-
# Apply filtering by user ID and optional conditions
|
132 |
-
query = query
|
133 |
if id:
|
134 |
query = query.where(model.id == id)
|
135 |
if filter_conditions:
|
|
|
81 |
|
82 |
def update(self, db, model, id, update_data, filter_conditions=None):
|
83 |
"""Update an entry by ID."""
|
84 |
+
# Define the initial query to fetch the entry
|
85 |
query = select(model).where(model.id == id)
|
86 |
+
|
87 |
+
# Append additional filter conditions if provided
|
88 |
if filter_conditions:
|
89 |
+
query = query.where(model.id == id, *filter_conditions)
|
90 |
|
91 |
+
# Attempt to fetch the entry
|
92 |
not_found_message = f"Entry with ID {id} not found."
|
93 |
entry = self._fetch(db, query, not_found_message, multiple=False)
|
94 |
|
95 |
+
# Check if the entry was found
|
96 |
if isinstance(entry, JSONResponse):
|
97 |
return entry
|
98 |
|
99 |
+
# Prepare the update statement
|
100 |
+
stmt = update(model).where(model.id == id).values(update_data)
|
101 |
+
db.execute(stmt)
|
102 |
+
|
103 |
+
# If filter conditions were provided, apply them to the update as well
|
104 |
+
if filter_conditions:
|
105 |
+
filter_stmt = update(model).where(model.id == id, *filter_conditions).values(update_data)
|
106 |
+
db.execute(filter_stmt)
|
107 |
+
|
108 |
return self._handle_commit(db)
|
109 |
|
110 |
def update_entries(self, db, model, update_data, filter_conditions=None):
|
|
|
137 |
else:
|
138 |
query = select(model)
|
139 |
|
|
|
|
|
140 |
if id:
|
141 |
query = query.where(model.id == id)
|
142 |
if filter_conditions:
|
db/query/query_book.py
CHANGED
@@ -81,7 +81,12 @@ class BookQuery(BaseQuery):
|
|
81 |
|
82 |
def update_book(self, db, book_id, title, author):
|
83 |
update_data = {"title": title, "author": author}
|
84 |
-
self.update(
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
def delete_book(self, db, book_id):
|
87 |
self.delete(db, Metadata, book_id)
|
@@ -92,15 +97,6 @@ class BookQuery(BaseQuery):
|
|
92 |
def get_metadata_books(self, db, metadata_id):
|
93 |
return self.get(db, Metadata, id=metadata_id)
|
94 |
|
95 |
-
# def get_title_from_session(self, db, metadata_id, session_id):
|
96 |
-
# model = Session_Publisher
|
97 |
-
# columns = [Metadata.title]
|
98 |
-
# join_models = [Session_Publisher.id == session_id, Metadata.id == metadata_id]
|
99 |
-
|
100 |
-
# titles = self.get_all_with_join_columns(db, model, columns, join_models)
|
101 |
-
|
102 |
-
# return titles
|
103 |
-
|
104 |
def get_title_from_session(self, db, metadata_id, session_id):
|
105 |
model = Session_Publisher
|
106 |
columns = [Metadata.title]
|
|
|
81 |
|
82 |
def update_book(self, db, book_id, title, author):
|
83 |
update_data = {"title": title, "author": author}
|
84 |
+
self.update(
|
85 |
+
db,
|
86 |
+
model=Metadata,
|
87 |
+
id=book_id,
|
88 |
+
update_data=update_data,
|
89 |
+
)
|
90 |
|
91 |
def delete_book(self, db, book_id):
|
92 |
self.delete(db, Metadata, book_id)
|
|
|
97 |
def get_metadata_books(self, db, metadata_id):
|
98 |
return self.get(db, Metadata, id=metadata_id)
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
def get_title_from_session(self, db, metadata_id, session_id):
|
101 |
model = Session_Publisher
|
102 |
columns = [Metadata.title]
|
db/query/query_bot.py
CHANGED
@@ -39,7 +39,13 @@ class BotQuery(BaseQuery):
|
|
39 |
Session_Publisher.metadata_id == metadata_id,
|
40 |
]
|
41 |
|
42 |
-
sessions = self.get_columns(
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
return sessions
|
45 |
|
|
|
39 |
Session_Publisher.metadata_id == metadata_id,
|
40 |
]
|
41 |
|
42 |
+
sessions = self.get_columns(
|
43 |
+
db,
|
44 |
+
model=model,
|
45 |
+
columns=columns,
|
46 |
+
filter_conditions=filter_conditions,
|
47 |
+
multiple=True,
|
48 |
+
)
|
49 |
|
50 |
return sessions
|
51 |
|
db/query/query_category.py
CHANGED
@@ -1,17 +1,16 @@
|
|
1 |
from db.models import Category, Metadata
|
2 |
from db.query.base_query import BaseQuery
|
3 |
-
from sqlalchemy import select, delete, update
|
4 |
|
5 |
class CategoryQuery(BaseQuery):
|
6 |
def __init__(self, user):
|
7 |
super().__init__(user)
|
8 |
|
9 |
-
def add_category(self, db,
|
10 |
-
new_category = Category(
|
11 |
return self.add(db, new_category)
|
12 |
|
13 |
-
def update_category(self, db, category_id,
|
14 |
-
update_data = {"
|
15 |
self.update(db, Category, category_id, update_data)
|
16 |
|
17 |
def delete_category(self, db, category_id):
|
@@ -20,7 +19,22 @@ class CategoryQuery(BaseQuery):
|
|
20 |
def get_category(self, db, category_id):
|
21 |
columns = [Category.category]
|
22 |
model = Category
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def get_current_category(self, db, metadata_id):
|
26 |
columns = [Category.category]
|
@@ -38,4 +52,6 @@ class CategoryQuery(BaseQuery):
|
|
38 |
return result
|
39 |
|
40 |
def get_all_categories(self, db):
|
41 |
-
|
|
|
|
|
|
1 |
from db.models import Category, Metadata
|
2 |
from db.query.base_query import BaseQuery
|
|
|
3 |
|
4 |
class CategoryQuery(BaseQuery):
|
5 |
def __init__(self, user):
|
6 |
super().__init__(user)
|
7 |
|
8 |
+
def add_category(self, db, category_name):
|
9 |
+
new_category = Category(category=category_name)
|
10 |
return self.add(db, new_category)
|
11 |
|
12 |
+
def update_category(self, db, category_id, category_name):
|
13 |
+
update_data = {"category": category_name}
|
14 |
self.update(db, Category, category_id, update_data)
|
15 |
|
16 |
def delete_category(self, db, category_id):
|
|
|
19 |
def get_category(self, db, category_id):
|
20 |
columns = [Category.category]
|
21 |
model = Category
|
22 |
+
results = self.get_columns(db,model=model, columns=columns, id=category_id)
|
23 |
+
return results
|
24 |
+
|
25 |
+
def get_existing_category(self, db, category_name):
|
26 |
+
columns = [Category.category]
|
27 |
+
model = Category
|
28 |
+
filter_condition = [Category.category == category_name]
|
29 |
+
results = self.get_columns(db, columns=columns, model=model, filter_conditions=filter_condition)
|
30 |
+
return results
|
31 |
+
|
32 |
+
def get_category_by_id(self, db, category_id):
|
33 |
+
model = Category
|
34 |
+
filter_conditions = [Category.id == category_id]
|
35 |
+
results = self.get(db, model=model, filter_conditions=filter_conditions)
|
36 |
+
|
37 |
+
return results
|
38 |
|
39 |
def get_current_category(self, db, metadata_id):
|
40 |
columns = [Category.category]
|
|
|
52 |
return result
|
53 |
|
54 |
def get_all_categories(self, db):
|
55 |
+
results = self.get(db, Category, multiple=True)
|
56 |
+
categories = [category[0] for category in results]
|
57 |
+
return categories
|
db/query/query_user_meta.py
CHANGED
@@ -6,24 +6,6 @@ from db.query.base_query import BaseQuery
|
|
6 |
class UserMetaQuery(BaseQuery):
|
7 |
def __init__(self, user):
|
8 |
super().__init__(user)
|
9 |
-
|
10 |
-
# def get_user_meta_entries(self, db):
|
11 |
-
# """Fetch all user meta entries joined with metadata and category."""
|
12 |
-
# join_models = [Metadata, Category]
|
13 |
-
# print(join_models)
|
14 |
-
# join_conditions = [
|
15 |
-
# User_Meta.metadata_id == Metadata.id,
|
16 |
-
# Metadata.category_id == Category.id,
|
17 |
-
# ]
|
18 |
-
# print(join_conditions)
|
19 |
-
|
20 |
-
# result = self.get_all_with_joins(
|
21 |
-
# db,
|
22 |
-
# model=User_Meta,
|
23 |
-
# join_models=join_models,
|
24 |
-
# join_conditions=join_conditions,
|
25 |
-
# )
|
26 |
-
# return result
|
27 |
|
28 |
def get_user_meta_entries(self, db):
|
29 |
"""Fetch all user meta entries joined with metadata and category."""
|
|
|
6 |
class UserMetaQuery(BaseQuery):
|
7 |
def __init__(self, user):
|
8 |
super().__init__(user)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def get_user_meta_entries(self, db):
|
11 |
"""Fetch all user meta entries joined with metadata and category."""
|
db/repository.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
from databases import Database
|
2 |
-
import datetime
|
3 |
-
|
4 |
-
|
5 |
-
def get_db_conn(config):
|
6 |
-
db_url = f"{config.DB_URI}"
|
7 |
-
return Database(db_url)
|
8 |
-
|
9 |
-
|
10 |
-
class Repository:
|
11 |
-
def __init__(self, db_conn):
|
12 |
-
self.db_conn = db_conn
|
13 |
-
|
14 |
-
async def get_by_query(self, query, param):
|
15 |
-
results = await self.db_conn.fetch_all(query, param)
|
16 |
-
print("result get _by query", results)
|
17 |
-
return [dict(result) for result in results]
|
18 |
-
|
19 |
-
async def _fetch_one(self, query, param):
|
20 |
-
result = await self.db_conn.fetch_one(query, param)
|
21 |
-
return dict(result) if result is not None else result
|
22 |
-
|
23 |
-
async def _exec(self, query, param):
|
24 |
-
return await self.db_conn.execute(query, param)
|
25 |
-
|
26 |
-
async def _exec_many(self, query, params):
|
27 |
-
return await self.db_conn.execute_many(query, params)
|
28 |
-
|
29 |
-
def update_params(self, params, update=False):
|
30 |
-
current_time = datetime.datetime.now()
|
31 |
-
if update == False:
|
32 |
-
|
33 |
-
params.update({"createdAt": current_time, "updatedAt": current_time})
|
34 |
-
else:
|
35 |
-
params.update({"updatedAt": current_time})
|
36 |
-
return params
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
script/document_uploader.py
CHANGED
@@ -2,15 +2,16 @@ from llama_index.core.ingestion import IngestionPipeline
|
|
2 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
3 |
from config import PINECONE_CONFIG
|
4 |
from pinecone.grpc import PineconeGRPC as Pinecone
|
5 |
-
from service.reader import Reader
|
6 |
from script.get_metadata import Metadata
|
7 |
-
from fastapi import UploadFile,status
|
8 |
from fastapi.responses import JSONResponse
|
9 |
|
10 |
from llama_index.core.node_parser import (
|
11 |
SentenceSplitter,
|
12 |
SemanticSplitterNodeParser,
|
13 |
)
|
|
|
14 |
|
15 |
# from script.get_topic import extract_topic
|
16 |
|
@@ -23,18 +24,18 @@ class Uploader:
|
|
23 |
def __init__(self, reference, file: UploadFile):
|
24 |
self.file = file
|
25 |
# self.content_table = content_table
|
26 |
-
self.reader = Reader()
|
27 |
self.reference = reference
|
28 |
self.metadata = Metadata(reference)
|
29 |
|
30 |
-
async def ingest_documents(self, file: UploadFile):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
35 |
|
36 |
-
return documents
|
37 |
-
|
38 |
def check_existing_metadata(self, pinecone_index, title, random_vector):
|
39 |
try:
|
40 |
result = pinecone_index.query(
|
@@ -53,17 +54,17 @@ class Uploader:
|
|
53 |
|
54 |
async def process_documents(self):
|
55 |
# Ingest documents
|
56 |
-
documents = await self.ingest_documents(self.file)
|
57 |
-
|
58 |
-
# topic_extractor = extract_topic(self.reference, self.content_table)
|
59 |
-
|
60 |
-
embed_model = OpenAIEmbedding()
|
61 |
|
62 |
# Get metadata
|
63 |
-
documents_with_metadata = self.metadata.apply_metadata(documents)
|
64 |
-
|
|
|
|
|
|
|
65 |
# document_filtered = self.filter_document(documents_with_metadata)
|
66 |
|
|
|
67 |
# Set up the ingestion pipeline
|
68 |
pipeline = IngestionPipeline(
|
69 |
transformations=[
|
@@ -75,7 +76,7 @@ class Uploader:
|
|
75 |
# topic_extractor,
|
76 |
]
|
77 |
)
|
78 |
-
|
79 |
# splitter = SemanticSplitterNodeParser(
|
80 |
# buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model
|
81 |
# )
|
@@ -85,12 +86,14 @@ class Uploader:
|
|
85 |
nodes_with_metadata = pipeline.run(documents=documents_with_metadata)
|
86 |
# nodes_with_metadata = splitter.get_nodes_from_documents(documents_with_metadata)
|
87 |
return nodes_with_metadata
|
88 |
-
|
89 |
except Exception as e:
|
90 |
try:
|
91 |
# If the first method fails, fallback to sentence splitter
|
92 |
sentence_splitter = SentenceSplitter(chunk_size=512)
|
93 |
-
nodes_with_metadata = sentence_splitter.get_nodes_from_documents(
|
|
|
|
|
94 |
print("Pipeline processing completed with SentenceSplitter fallback.")
|
95 |
return nodes_with_metadata
|
96 |
except Exception as fallback_error:
|
@@ -100,7 +103,7 @@ class Uploader:
|
|
100 |
status_code=500,
|
101 |
content="An internal server error occurred during pipeline processing.",
|
102 |
)
|
103 |
-
|
104 |
def filter_document(self, documents):
|
105 |
api_key = PINECONE_CONFIG.PINECONE_API_KEY
|
106 |
client = Pinecone(api_key=api_key)
|
@@ -117,4 +120,4 @@ class Uploader:
|
|
117 |
if len(result) == 0:
|
118 |
filtered_documents.append(doc)
|
119 |
|
120 |
-
return filtered_documents
|
|
|
2 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
3 |
from config import PINECONE_CONFIG
|
4 |
from pinecone.grpc import PineconeGRPC as Pinecone
|
5 |
+
# from service.reader import Reader
|
6 |
from script.get_metadata import Metadata
|
7 |
+
from fastapi import UploadFile, status
|
8 |
from fastapi.responses import JSONResponse
|
9 |
|
10 |
from llama_index.core.node_parser import (
|
11 |
SentenceSplitter,
|
12 |
SemanticSplitterNodeParser,
|
13 |
)
|
14 |
+
from service.reader_v3 import upload_file
|
15 |
|
16 |
# from script.get_topic import extract_topic
|
17 |
|
|
|
24 |
def __init__(self, reference, file: UploadFile):
|
25 |
self.file = file
|
26 |
# self.content_table = content_table
|
27 |
+
# self.reader = Reader()
|
28 |
self.reference = reference
|
29 |
self.metadata = Metadata(reference)
|
30 |
|
31 |
+
# async def ingest_documents(self, file: UploadFile):
|
32 |
+
# """Load documents from the storage path."""
|
33 |
+
# documents = await self.reader.read_from_uploadfile(file)
|
34 |
+
# print("Banyak document : ", len(documents))
|
35 |
+
# print("document successfully ingested")
|
36 |
+
|
37 |
+
# return documents
|
38 |
|
|
|
|
|
39 |
def check_existing_metadata(self, pinecone_index, title, random_vector):
|
40 |
try:
|
41 |
result = pinecone_index.query(
|
|
|
54 |
|
55 |
async def process_documents(self):
|
56 |
# Ingest documents
|
57 |
+
# documents = await self.ingest_documents(self.file)
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Get metadata
|
60 |
+
# documents_with_metadata = self.metadata.apply_metadata(documents)
|
61 |
+
documents_with_metadata = await upload_file(self.reference, self.file)
|
62 |
+
|
63 |
+
# Get Topic
|
64 |
+
# topic_extractor = extract_topic(self.reference, self.content_table)
|
65 |
# document_filtered = self.filter_document(documents_with_metadata)
|
66 |
|
67 |
+
embed_model = OpenAIEmbedding()
|
68 |
# Set up the ingestion pipeline
|
69 |
pipeline = IngestionPipeline(
|
70 |
transformations=[
|
|
|
76 |
# topic_extractor,
|
77 |
]
|
78 |
)
|
79 |
+
|
80 |
# splitter = SemanticSplitterNodeParser(
|
81 |
# buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model
|
82 |
# )
|
|
|
86 |
nodes_with_metadata = pipeline.run(documents=documents_with_metadata)
|
87 |
# nodes_with_metadata = splitter.get_nodes_from_documents(documents_with_metadata)
|
88 |
return nodes_with_metadata
|
89 |
+
|
90 |
except Exception as e:
|
91 |
try:
|
92 |
# If the first method fails, fallback to sentence splitter
|
93 |
sentence_splitter = SentenceSplitter(chunk_size=512)
|
94 |
+
nodes_with_metadata = sentence_splitter.get_nodes_from_documents(
|
95 |
+
documents_with_metadata
|
96 |
+
)
|
97 |
print("Pipeline processing completed with SentenceSplitter fallback.")
|
98 |
return nodes_with_metadata
|
99 |
except Exception as fallback_error:
|
|
|
103 |
status_code=500,
|
104 |
content="An internal server error occurred during pipeline processing.",
|
105 |
)
|
106 |
+
|
107 |
def filter_document(self, documents):
|
108 |
api_key = PINECONE_CONFIG.PINECONE_API_KEY
|
109 |
client = Pinecone(api_key=api_key)
|
|
|
120 |
if len(result) == 0:
|
121 |
filtered_documents.append(doc)
|
122 |
|
123 |
+
return filtered_documents
|
service/reader_v3.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import nest_asyncio
|
3 |
+
|
4 |
+
from llama_parse import LlamaParse
|
5 |
+
from llama_index.core.node_parser import SimpleNodeParser
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from fastapi import UploadFile, File
|
8 |
+
from fastapi.responses import JSONResponse
|
9 |
+
import fitz
|
10 |
+
|
11 |
+
from script.get_metadata import Metadata
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
nest_asyncio.apply()
|
15 |
+
|
16 |
+
|
17 |
+
def parse_journal(content: bytes, file_name: str):
|
18 |
+
"""Parse the journal using LlamaParse."""
|
19 |
+
try:
|
20 |
+
# Initialize the parser
|
21 |
+
parser = LlamaParse(
|
22 |
+
api_key=os.getenv("LLAMA_PARSE_API_KEY"),
|
23 |
+
result_type="markdown",
|
24 |
+
use_vendor_multimodal_model=True,
|
25 |
+
vendor_multimodal_model_name="openai-gpt-4o-mini",
|
26 |
+
)
|
27 |
+
|
28 |
+
# Load and process the document
|
29 |
+
llama_parse_documents = parser.load_data(
|
30 |
+
content, extra_info={"file_name": file_name}
|
31 |
+
)
|
32 |
+
|
33 |
+
return llama_parse_documents
|
34 |
+
|
35 |
+
except Exception as e:
|
36 |
+
return JSONResponse(status_code=400, content=f"Error processing file: {e}")
|
37 |
+
|
38 |
+
|
39 |
+
async def extract_metadata(content: bytes):
|
40 |
+
"""Extract metadata from the PDF content."""
|
41 |
+
try:
|
42 |
+
# Open the binary content with PyMuPDF
|
43 |
+
pdf_document = fitz.open("pdf", content) # "pdf" specifies the format
|
44 |
+
|
45 |
+
# Extract metadata
|
46 |
+
metadata = pdf_document.metadata
|
47 |
+
|
48 |
+
# Prepare metadata dictionary with default values for missing fields
|
49 |
+
metadata_dict = {
|
50 |
+
"title": metadata.get("title", "N/A"),
|
51 |
+
"author": metadata.get("author", "N/A"),
|
52 |
+
"subject": metadata.get("subject", "N/A"),
|
53 |
+
"keywords": metadata.get("keywords", "N/A"),
|
54 |
+
"creation_date": metadata.get("created", "N/A"),
|
55 |
+
"modification_date": metadata.get("modified", "N/A"),
|
56 |
+
}
|
57 |
+
|
58 |
+
return metadata_dict
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
return JSONResponse(status_code=500, content=f"Error inputting metadata: {e}")
|
62 |
+
|
63 |
+
|
64 |
+
async def upload_file(reference, file: UploadFile):
|
65 |
+
try:
|
66 |
+
# Read the binary content of the uploaded file once
|
67 |
+
content = await file.read()
|
68 |
+
# Parse the journal
|
69 |
+
parsed_documents = parse_journal(content, file.filename)
|
70 |
+
# Extract metadata
|
71 |
+
# metadata_dict = await extract_metadata(content)
|
72 |
+
# print("Metadata Dictionary : \n\n", metadata_dict)
|
73 |
+
|
74 |
+
metadata_gen = Metadata(reference)
|
75 |
+
documents_with_metadata = metadata_gen.apply_metadata(parsed_documents)
|
76 |
+
|
77 |
+
# document_with_metadata =
|
78 |
+
|
79 |
+
print("Document with Metadata : \n\n", documents_with_metadata)
|
80 |
+
print("Banyak documents : \n", len(documents_with_metadata))
|
81 |
+
|
82 |
+
# Return both parsed documents and metadata
|
83 |
+
return documents_with_metadata
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
return JSONResponse(status_code=500, content=f"Error processing file: {e}")
|
utils/error_handlers.py
CHANGED
@@ -9,12 +9,12 @@ def handle_exception(e: Exception):
|
|
9 |
|
10 |
|
11 |
def handle_error(e, message):
|
12 |
-
return JSONResponse(status_code=500, content={"error": f"{message}: {str(e)}"})
|
13 |
|
14 |
|
15 |
def not_found_error(message):
|
16 |
-
return JSONResponse(status_code=404, content={"
|
17 |
|
18 |
|
19 |
def no_entries_found(message):
|
20 |
-
return JSONResponse(status_code=404, content={"
|
|
|
9 |
|
10 |
|
11 |
def handle_error(e, message):
|
12 |
+
return JSONResponse(status_code=500, content={"error occurs": f"{message}: {str(e)}"})
|
13 |
|
14 |
|
15 |
def not_found_error(message):
|
16 |
+
return JSONResponse(status_code=404, content={"not found message": message})
|
17 |
|
18 |
|
19 |
def no_entries_found(message):
|
20 |
+
return JSONResponse(status_code=404, content={"no entries found": message})
|