|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import re |
|
import warnings |
|
|
|
from flask import request |
|
from flask_login import login_required, current_user |
|
|
|
from api.db import FileType, ParserType |
|
from api.db.services import duplicate_name |
|
from api.db.services.document_service import DocumentService |
|
from api.db.services.file2document_service import File2DocumentService |
|
from api.db.services.file_service import FileService |
|
from api.db.services.knowledgebase_service import KnowledgebaseService |
|
from api.settings import RetCode |
|
from api.utils import get_uuid |
|
from api.utils.api_utils import construct_json_result |
|
from api.utils.file_utils import filename_type, thumbnail |
|
from rag.utils.minio_conn import MINIO |
|
from api.db.db_models import Task, File |
|
from api.db import FileType, TaskStatus, ParserType, FileSource |
|
|
|
|
|
MAXIMUM_OF_UPLOADING_FILES = 256 |
|
|
|
|
|
|
|
@manager.route('/<dataset_id>', methods=['POST']) |
|
@login_required |
|
def upload(dataset_id): |
|
|
|
if not request.files: |
|
return construct_json_result( |
|
message='There is no file!', code=RetCode.ARGUMENT_ERROR) |
|
|
|
|
|
file_objs = request.files.getlist('file') |
|
num_file_objs = len(file_objs) |
|
|
|
if num_file_objs > MAXIMUM_OF_UPLOADING_FILES: |
|
return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, " |
|
f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}") |
|
|
|
for file_obj in file_objs: |
|
|
|
file_content = file_obj.read() |
|
file_name = file_obj.filename |
|
|
|
if not file_name: |
|
return construct_json_result( |
|
message='There is a file without name!', code=RetCode.ARGUMENT_ERROR) |
|
|
|
|
|
if 'http' in file_name: |
|
return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.") |
|
|
|
|
|
if file_content == b'': |
|
warnings.warn(f"[WARNING]: The file {file_name} is empty.") |
|
|
|
|
|
exist, dataset = KnowledgebaseService.get_by_id(dataset_id) |
|
if not exist: |
|
return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR) |
|
|
|
|
|
root_folder = FileService.get_root_folder(current_user.id) |
|
|
|
parent_file_id = root_folder["id"] |
|
|
|
FileService.init_knowledgebase_docs(parent_file_id, current_user.id) |
|
|
|
kb_root_folder = FileService.get_kb_folder(current_user.id) |
|
|
|
kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"]) |
|
|
|
|
|
err = [] |
|
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) |
|
uploaded_docs_json = [] |
|
for file in file_objs: |
|
try: |
|
|
|
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER: |
|
return construct_json_result(code=RetCode.DATA_ERROR, |
|
message="Exceed the maximum file number of a free user!") |
|
|
|
filename = duplicate_name( |
|
DocumentService.query, |
|
name=file.filename, |
|
kb_id=dataset.id) |
|
|
|
|
|
filetype = filename_type(filename) |
|
if filetype == FileType.OTHER.value: |
|
return construct_json_result(code=RetCode.DATA_ERROR, |
|
message="This type of file has not been supported yet!") |
|
|
|
|
|
location = filename |
|
while MINIO.obj_exist(dataset_id, location): |
|
location += "_" |
|
blob = file.read() |
|
MINIO.put(dataset_id, location, blob) |
|
doc = { |
|
"id": get_uuid(), |
|
"kb_id": dataset.id, |
|
"parser_id": dataset.parser_id, |
|
"parser_config": dataset.parser_config, |
|
"created_by": current_user.id, |
|
"type": filetype, |
|
"name": filename, |
|
"location": location, |
|
"size": len(blob), |
|
"thumbnail": thumbnail(filename, blob) |
|
} |
|
if doc["type"] == FileType.VISUAL: |
|
doc["parser_id"] = ParserType.PICTURE.value |
|
if re.search(r"\.(ppt|pptx|pages)$", filename): |
|
doc["parser_id"] = ParserType.PRESENTATION.value |
|
DocumentService.insert(doc) |
|
|
|
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id) |
|
uploaded_docs_json.append(doc) |
|
except Exception as e: |
|
err.append(file.filename + ": " + str(e)) |
|
|
|
if err: |
|
|
|
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR) |
|
|
|
return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS) |
|
|
|
|
|
@manager.route('/<dataset_id>/<document_id>', methods=['DELETE']) |
|
@login_required |
|
def delete(document_id, dataset_id): |
|
|
|
root_folder = FileService.get_root_folder(current_user.id) |
|
|
|
parent_file_id = root_folder["id"] |
|
|
|
FileService.init_knowledgebase_docs(parent_file_id, current_user.id) |
|
|
|
errors = "" |
|
try: |
|
|
|
exist, doc = DocumentService.get_by_id(document_id) |
|
if not exist: |
|
return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR) |
|
|
|
tenant_id = DocumentService.get_tenant_id(document_id) |
|
if not tenant_id: |
|
return construct_json_result(message=f"You cannot delete this document {document_id} due to the authorization" |
|
f" reason!", code=RetCode.AUTHENTICATION_ERROR) |
|
|
|
|
|
real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id) |
|
|
|
if real_dataset_id != dataset_id: |
|
return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, " |
|
f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR) |
|
|
|
|
|
if not DocumentService.remove_document(doc, tenant_id): |
|
return construct_json_result( |
|
message="There was an error during the document removal process. Please check the status of the " |
|
"RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR) |
|
|
|
|
|
file_to_doc = File2DocumentService.get_by_document_id(document_id) |
|
|
|
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id]) |
|
|
|
|
|
File2DocumentService.delete_by_document_id(document_id) |
|
|
|
|
|
MINIO.rm(dataset_id, location) |
|
except Exception as e: |
|
errors += str(e) |
|
if errors: |
|
return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR) |
|
|
|
return construct_json_result(data=True, code=RetCode.SUCCESS) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|