File size: 3,319 Bytes
fa82d94 ee8a916 ce45214 278278b ce45214 95da4bf ce45214 95da4bf ce45214 74bda08 ce45214 278278b ce45214 ee8a916 9a8dfa4 ee8a916 9a8dfa4 ee8a916 fa82d94 9a8dfa4 ce45214 3d9274d fa82d94 3d9274d fa82d94 3d9274d 278278b 2d7e5db fa82d94 3d9274d fa82d94 3d9274d fa82d94 3d9274d fa82d94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
from .base import Base
from .chunk import Chunk
class Document(Base):
class ParserConfig(Base):
def __init__(self, rag, res_dict):
super().__init__(rag, res_dict)
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.thumbnail = None
self.dataset_id = None
self.chunk_method = "naive"
self.parser_config = {"pages": [[1, 1000000]]}
self.source_type = "local"
self.type = ""
self.created_by = ""
self.size = 0
self.token_count = 0
self.chunk_count = 0
self.progress = 0.0
self.progress_msg = ""
self.process_begin_at = None
self.process_duration = 0.0
self.run = "0"
self.status = "1"
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/datasets/{self.dataset_id}/documents/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def download(self):
res = self.get(f"/datasets/{self.dataset_id}/documents/{self.id}")
try:
res = res.json()
raise Exception(res.get("message"))
except json.JSONDecodeError:
return res.content
def list_chunks(self, page=1, page_size=30, keywords=""):
data = {"keywords": keywords, "page": page, "page_size": page_size}
res = self.get(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', data)
res = res.json()
if res.get("code") == 0:
chunks = []
for data in res["data"].get("chunks"):
chunk = Chunk(self.rag, data)
chunks.append(chunk)
return chunks
raise Exception(res.get("message"))
def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = []):
res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks',
{"content": content, "important_keywords": important_keywords, "questions": questions})
res = res.json()
if res.get("code") == 0:
return Chunk(self.rag, res["data"].get("chunk"))
raise Exception(res.get("message"))
def delete_chunks(self, ids: list[str] | None = None):
res = self.rm(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", {"chunk_ids": ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
|