File size: 2,647 Bytes
ee8a916 ce45214 278278b 3d9274d ce45214 95da4bf ce45214 95da4bf ce45214 74bda08 ce45214 278278b ce45214 ee8a916 9a8dfa4 ee8a916 9a8dfa4 ee8a916 3d9274d 9a8dfa4 ce45214 3d9274d 278278b 95da4bf 9a8dfa4 3d9274d eabf8a3 9a8dfa4 3d9274d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import json
from .base import Base
from .chunk import Chunk
from typing import List
class Document(Base):
class ParserConfig(Base):
def __init__(self, rag, res_dict):
super().__init__(rag, res_dict)
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.thumbnail = None
self.dataset_id = None
self.chunk_method = "naive"
self.parser_config = {"pages": [[1, 1000000]]}
self.source_type = "local"
self.type = ""
self.created_by = ""
self.size = 0
self.token_count = 0
self.chunk_count = 0
self.progress = 0.0
self.progress_msg = ""
self.process_begin_at = None
self.process_duration = 0.0
self.run = "0"
self.status = "1"
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/datasets/{self.dataset_id}/documents/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def download(self):
res = self.get(f"/datasets/{self.dataset_id}/documents/{self.id}")
try:
res = res.json()
raise Exception(res.get("message"))
except json.JSONDecodeError:
return res.content
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
res = self.get(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', data)
res = res.json()
if res.get("code") == 0:
chunks=[]
for data in res["data"].get("chunks"):
chunk = Chunk(self.rag,data)
chunks.append(chunk)
return chunks
raise Exception(res.get("message"))
def add_chunk(self, content: str,important_keywords:List[str]=[]):
res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', {"content":content,"important_keywords":important_keywords})
res = res.json()
if res.get("code") == 0:
return Chunk(self.rag,res["data"].get("chunk"))
raise Exception(res.get("message"))
def delete_chunks(self,ids:List[str] = None):
res = self.rm(f"datasets/{self.dataset_id}/documents/{self.id}/chunks",{"ids":ids})
res = res.json()
if res.get("code")!=0:
raise Exception(res.get("message")) |