File size: 2,647 Bytes
ee8a916
ce45214
278278b
3d9274d
ce45214
 
 
95da4bf
 
 
 
ce45214
 
 
 
95da4bf
 
ce45214
 
 
 
 
74bda08
 
ce45214
 
 
 
278278b
 
ce45214
 
 
 
 
ee8a916
 
9a8dfa4
ee8a916
 
 
 
 
 
9a8dfa4
ee8a916
 
 
 
 
 
 
3d9274d
 
9a8dfa4
ce45214
3d9274d
 
 
 
 
 
 
278278b
 
95da4bf
9a8dfa4
3d9274d
 
 
 
 
eabf8a3
9a8dfa4
3d9274d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
from .base import Base
from .chunk import Chunk
from typing import List


class Document(Base):
    class ParserConfig(Base):
        def __init__(self, rag, res_dict):
            super().__init__(rag, res_dict)

    def __init__(self, rag, res_dict):
        self.id = ""
        self.name = ""
        self.thumbnail = None
        self.dataset_id = None
        self.chunk_method = "naive"
        self.parser_config = {"pages": [[1, 1000000]]}
        self.source_type = "local"
        self.type = ""
        self.created_by = ""
        self.size = 0
        self.token_count = 0
        self.chunk_count = 0
        self.progress = 0.0
        self.progress_msg = ""
        self.process_begin_at = None
        self.process_duration = 0.0
        self.run = "0"
        self.status = "1"
        for k in list(res_dict.keys()):
            if k not in self.__dict__:
                res_dict.pop(k)
        super().__init__(rag, res_dict)


    def update(self, update_message: dict):
        res = self.put(f'/datasets/{self.dataset_id}/documents/{self.id}',
                       update_message)
        res = res.json()
        if res.get("code") != 0:
            raise Exception(res["message"])

    def download(self):
        res = self.get(f"/datasets/{self.dataset_id}/documents/{self.id}")
        try:
            res = res.json()
            raise Exception(res.get("message"))
        except json.JSONDecodeError:
            return res.content


    def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
        data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
        res = self.get(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', data)
        res = res.json()
        if res.get("code") == 0:
            chunks=[]
            for data in res["data"].get("chunks"):
                chunk = Chunk(self.rag,data)
                chunks.append(chunk)
            return chunks
        raise Exception(res.get("message"))


    def add_chunk(self, content: str,important_keywords:List[str]=[]):
        res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', {"content":content,"important_keywords":important_keywords})
        res = res.json()
        if res.get("code") == 0:
            return Chunk(self.rag,res["data"].get("chunk"))
        raise Exception(res.get("message"))

    def delete_chunks(self,ids:List[str] = None):
        res = self.rm(f"datasets/{self.dataset_id}/documents/{self.id}/chunks",{"ids":ids})
        res = res.json()
        if res.get("code")!=0:
            raise Exception(res.get("message"))