liuhua
liuhua
commited on
Commit
·
eabf8a3
1
Parent(s):
5b9e61c
Fix issues in API (#3008)
Browse files### What problem does this PR solve?
Fix issues in API
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Co-authored-by: liuhua <[email protected]>
- api/apps/sdk/dataset.py +16 -3
- api/apps/sdk/doc.py +38 -10
- api/http_api_reference.md +129 -188
- api/utils/api_utils.py +1 -1
- sdk/python/ragflow/modules/document.py +1 -1
api/apps/sdk/dataset.py
CHANGED
@@ -64,7 +64,12 @@ def create(tenant_id):
|
|
64 |
if not req.get("embedding_model"):
|
65 |
req['embedding_model'] = t.embd_id
|
66 |
else:
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
68 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
69 |
key_mapping = {
|
70 |
"chunk_num": "chunk_count",
|
@@ -133,6 +138,9 @@ def update(tenant_id,dataset_id):
|
|
133 |
return get_error_data_result(
|
134 |
retmsg="Can't change `tenant_id`.")
|
135 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
|
|
|
|
|
|
136 |
if "chunk_count" in req:
|
137 |
if req["chunk_count"] != kb.chunk_num:
|
138 |
return get_error_data_result(
|
@@ -153,10 +161,15 @@ def update(tenant_id,dataset_id):
|
|
153 |
if "embedding_model" in req:
|
154 |
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
155 |
return get_error_data_result(
|
156 |
-
retmsg="If `chunk_count` is not 0, `
|
157 |
if not req.get("embedding_model"):
|
158 |
return get_error_data_result("`embedding_model` can't be empty")
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
160 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
161 |
req['embd_id'] = req.pop('embedding_model')
|
162 |
if "name" in req:
|
|
|
64 |
if not req.get("embedding_model"):
|
65 |
req['embedding_model'] = t.embd_id
|
66 |
else:
|
67 |
+
valid_embedding_models=["BAAI/bge-large-zh-v1.5","BAAI/bge-base-en-v1.5","BAAI/bge-large-en-v1.5","BAAI/bge-small-en-v1.5",
|
68 |
+
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
69 |
+
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
70 |
+
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
71 |
+
if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model"))\
|
72 |
+
and req.get("embedding_model") not in valid_embedding_models:
|
73 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
74 |
key_mapping = {
|
75 |
"chunk_num": "chunk_count",
|
|
|
138 |
return get_error_data_result(
|
139 |
retmsg="Can't change `tenant_id`.")
|
140 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
141 |
+
if "parser_config" in req:
|
142 |
+
print(kb.parser_config,flush=True)
|
143 |
+
req["parser_config"]=kb.parser_config.update(req["parser_config"])
|
144 |
if "chunk_count" in req:
|
145 |
if req["chunk_count"] != kb.chunk_num:
|
146 |
return get_error_data_result(
|
|
|
161 |
if "embedding_model" in req:
|
162 |
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
163 |
return get_error_data_result(
|
164 |
+
retmsg="If `chunk_count` is not 0, `embedding_model` is not changeable.")
|
165 |
if not req.get("embedding_model"):
|
166 |
return get_error_data_result("`embedding_model` can't be empty")
|
167 |
+
valid_embedding_models=["BAAI/bge-large-zh-v1.5","BAAI/bge-base-en-v1.5","BAAI/bge-large-en-v1.5","BAAI/bge-small-en-v1.5",
|
168 |
+
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
169 |
+
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
170 |
+
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
171 |
+
if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model"))\
|
172 |
+
and req.get("embedding_model") not in valid_embedding_models:
|
173 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
174 |
req['embd_id'] = req.pop('embedding_model')
|
175 |
if "name" in req:
|
api/apps/sdk/doc.py
CHANGED
@@ -163,9 +163,6 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
163 |
doc.process_duation * -1)
|
164 |
if not e:
|
165 |
return get_error_data_result(retmsg="Document not found!")
|
166 |
-
tenant_id = DocumentService.get_tenant_id(req["id"])
|
167 |
-
if not tenant_id:
|
168 |
-
return get_error_data_result(retmsg="Tenant not found!")
|
169 |
ELASTICSEARCH.deleteByQuery(
|
170 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
171 |
|
@@ -245,14 +242,22 @@ def delete(tenant_id,dataset_id):
|
|
245 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
246 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
247 |
req = request.json
|
248 |
-
if not req
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
root_folder = FileService.get_root_folder(tenant_id)
|
252 |
pf_id = root_folder["id"]
|
253 |
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
254 |
errors = ""
|
255 |
-
for doc_id in
|
256 |
try:
|
257 |
e, doc = DocumentService.get_by_id(doc_id)
|
258 |
if not e:
|
@@ -290,8 +295,11 @@ def parse(tenant_id,dataset_id):
|
|
290 |
if not req.get("document_ids"):
|
291 |
return get_error_data_result("`document_ids` is required")
|
292 |
for id in req["document_ids"]:
|
293 |
-
|
|
|
294 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
|
|
|
|
295 |
info = {"run": "1", "progress": 0}
|
296 |
info["progress_msg"] = ""
|
297 |
info["chunk_num"] = 0
|
@@ -349,7 +357,27 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
349 |
"doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
|
350 |
}
|
351 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
origin_chunks = []
|
354 |
sign = 0
|
355 |
for id in sres.ids:
|
@@ -388,7 +416,7 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
388 |
"content_with_weight": "content",
|
389 |
"doc_id": "document_id",
|
390 |
"important_kwd": "important_keywords",
|
391 |
-
"img_id": "image_id"
|
392 |
}
|
393 |
renamed_chunk = {}
|
394 |
for key, value in chunk.items():
|
|
|
163 |
doc.process_duation * -1)
|
164 |
if not e:
|
165 |
return get_error_data_result(retmsg="Document not found!")
|
|
|
|
|
|
|
166 |
ELASTICSEARCH.deleteByQuery(
|
167 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
168 |
|
|
|
242 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
243 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
244 |
req = request.json
|
245 |
+
if not req:
|
246 |
+
doc_ids=None
|
247 |
+
else:
|
248 |
+
doc_ids=req.get("ids")
|
249 |
+
if not doc_ids:
|
250 |
+
doc_list = []
|
251 |
+
docs=DocumentService.query(kb_id=dataset_id)
|
252 |
+
for doc in docs:
|
253 |
+
doc_list.append(doc.id)
|
254 |
+
else:
|
255 |
+
doc_list=doc_ids
|
256 |
root_folder = FileService.get_root_folder(tenant_id)
|
257 |
pf_id = root_folder["id"]
|
258 |
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
259 |
errors = ""
|
260 |
+
for doc_id in doc_list:
|
261 |
try:
|
262 |
e, doc = DocumentService.get_by_id(doc_id)
|
263 |
if not e:
|
|
|
295 |
if not req.get("document_ids"):
|
296 |
return get_error_data_result("`document_ids` is required")
|
297 |
for id in req["document_ids"]:
|
298 |
+
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
299 |
+
if not doc:
|
300 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
301 |
+
if doc[0].progress != 0.0:
|
302 |
+
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
303 |
info = {"run": "1", "progress": 0}
|
304 |
info["progress_msg"] = ""
|
305 |
info["chunk_num"] = 0
|
|
|
357 |
"doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
|
358 |
}
|
359 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
360 |
+
key_mapping = {
|
361 |
+
"chunk_num": "chunk_count",
|
362 |
+
"kb_id": "dataset_id",
|
363 |
+
"token_num": "token_count",
|
364 |
+
"parser_id": "chunk_method"
|
365 |
+
}
|
366 |
+
run_mapping = {
|
367 |
+
"0": "UNSTART",
|
368 |
+
"1": "RUNNING",
|
369 |
+
"2": "CANCEL",
|
370 |
+
"3": "DONE",
|
371 |
+
"4": "FAIL"
|
372 |
+
}
|
373 |
+
doc=doc.to_dict()
|
374 |
+
renamed_doc = {}
|
375 |
+
for key, value in doc.items():
|
376 |
+
if key == "run":
|
377 |
+
renamed_doc["run"] = run_mapping.get(str(value))
|
378 |
+
new_key = key_mapping.get(key, key)
|
379 |
+
renamed_doc[new_key] = value
|
380 |
+
res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
|
381 |
origin_chunks = []
|
382 |
sign = 0
|
383 |
for id in sres.ids:
|
|
|
416 |
"content_with_weight": "content",
|
417 |
"doc_id": "document_id",
|
418 |
"important_kwd": "important_keywords",
|
419 |
+
"img_id": "image_id"
|
420 |
}
|
421 |
renamed_chunk = {}
|
422 |
for key, value in chunk.items():
|
api/http_api_reference.md
CHANGED
@@ -31,7 +31,7 @@ Creates a dataset.
|
|
31 |
- `"language"`: `string`
|
32 |
- `"embedding_model"`: `string`
|
33 |
- `"permission"`: `string`
|
34 |
-
- `"
|
35 |
- `"parser_config"`: `Dataset.ParserConfig`
|
36 |
|
37 |
#### Request example
|
@@ -41,11 +41,9 @@ curl --request POST \
|
|
41 |
--url http://{address}/api/v1/dataset \
|
42 |
--header 'Content-Type: application/json' \
|
43 |
--header 'Authorization: Bearer {YOUR_API_KEY}' \
|
44 |
-
--data '
|
45 |
-
|
46 |
-
|
47 |
-
"chunk_method": "naive"
|
48 |
-
}'
|
49 |
```
|
50 |
|
51 |
#### Request parameters
|
@@ -109,31 +107,32 @@ Success:
|
|
109 |
"data": {
|
110 |
"avatar": null,
|
111 |
"chunk_count": 0,
|
112 |
-
"
|
113 |
-
"
|
|
|
114 |
"created_by": "69736c5e723611efb51b0242ac120007",
|
115 |
"description": null,
|
116 |
"document_count": 0,
|
117 |
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
118 |
-
"id": "
|
119 |
"language": "English",
|
120 |
"name": "test_1",
|
121 |
-
"parse_method": "naive",
|
122 |
"parser_config": {
|
123 |
-
"
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
129 |
},
|
130 |
"permission": "me",
|
131 |
"similarity_threshold": 0.2,
|
132 |
"status": "1",
|
133 |
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
134 |
"token_num": 0,
|
135 |
-
"update_date": "Thu,
|
136 |
-
"update_time":
|
137 |
"vector_similarity_weight": 0.3
|
138 |
}
|
139 |
}
|
@@ -229,9 +228,7 @@ curl --request PUT \
|
|
229 |
--header 'Authorization: Bearer {YOUR_API_KEY}' \
|
230 |
--data '
|
231 |
{
|
232 |
-
"name": "
|
233 |
-
"embedding_model": "BAAI/bge-zh-v1.5",
|
234 |
-
"chunk_method": "naive"
|
235 |
}'
|
236 |
```
|
237 |
|
@@ -336,7 +333,7 @@ Success:
|
|
336 |
"id": "6e211ee0723611efa10a0242ac120007",
|
337 |
"language": "English",
|
338 |
"name": "mysql",
|
339 |
-
"
|
340 |
"parser_config": {
|
341 |
"chunk_token_num": 8192,
|
342 |
"delimiter": "\\n!?;。;!?",
|
@@ -418,7 +415,30 @@ Success:
|
|
418 |
|
419 |
```json
|
420 |
{
|
421 |
-
"code": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
}
|
423 |
```
|
424 |
|
@@ -552,7 +572,7 @@ curl --request GET \
|
|
552 |
Success:
|
553 |
|
554 |
```text
|
555 |
-
|
556 |
```
|
557 |
|
558 |
Failure:
|
@@ -953,40 +973,54 @@ Success:
|
|
953 |
{
|
954 |
"code": 0,
|
955 |
"data": {
|
956 |
-
"chunks": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
957 |
"doc": {
|
958 |
-
"
|
959 |
-
"
|
960 |
-
"
|
|
|
961 |
"created_by": "69736c5e723611efb51b0242ac120007",
|
962 |
-
"
|
963 |
-
"
|
964 |
-
"location": "
|
965 |
-
"name": "
|
966 |
"parser_config": {
|
967 |
-
"
|
968 |
-
|
969 |
-
|
970 |
-
|
971 |
-
|
972 |
-
|
|
|
973 |
},
|
974 |
-
"
|
975 |
-
"
|
976 |
-
"
|
977 |
-
"
|
978 |
-
"
|
979 |
-
"
|
980 |
-
"size": 24,
|
981 |
"source_type": "local",
|
982 |
"status": "1",
|
983 |
-
"thumbnail":
|
984 |
-
"
|
985 |
"type": "doc",
|
986 |
-
"update_date": "
|
987 |
-
"update_time":
|
988 |
},
|
989 |
-
"total":
|
990 |
}
|
991 |
}
|
992 |
```
|
@@ -1287,29 +1321,7 @@ curl --request POST \
|
|
1287 |
--header 'Content-Type: application/json' \
|
1288 |
--header 'Authorization: Bearer {YOUR_API_KEY}'
|
1289 |
--data '{
|
1290 |
-
|
1291 |
-
{
|
1292 |
-
"avatar": null,
|
1293 |
-
"chunk_count": 0,
|
1294 |
-
"description": null,
|
1295 |
-
"document_count": 0,
|
1296 |
-
"embedding_model": "",
|
1297 |
-
"id": "0b2cbc8c877f11ef89070242ac120005",
|
1298 |
-
"language": "English",
|
1299 |
-
"name": "Test_assistant",
|
1300 |
-
"parse_method": "naive",
|
1301 |
-
"parser_config": {
|
1302 |
-
"pages": [
|
1303 |
-
[
|
1304 |
-
1,
|
1305 |
-
1000000
|
1306 |
-
]
|
1307 |
-
]
|
1308 |
-
},
|
1309 |
-
"permission": "me",
|
1310 |
-
"tenant_id": "4fb0cd625f9311efba4a0242ac120006"
|
1311 |
-
}
|
1312 |
-
],
|
1313 |
"name":"new_chat_1"
|
1314 |
}'
|
1315 |
```
|
@@ -1363,49 +1375,29 @@ Success:
|
|
1363 |
"code": 0,
|
1364 |
"data": {
|
1365 |
"avatar": "",
|
1366 |
-
"create_date": "
|
1367 |
-
"create_time":
|
|
|
|
|
|
|
1368 |
"description": "A helpful Assistant",
|
1369 |
"do_refer": "1",
|
1370 |
-
"id": "
|
1371 |
-
"knowledgebases": [
|
1372 |
-
{
|
1373 |
-
"avatar": null,
|
1374 |
-
"chunk_count": 0,
|
1375 |
-
"description": null,
|
1376 |
-
"document_count": 0,
|
1377 |
-
"embedding_model": "",
|
1378 |
-
"id": "0b2cbc8c877f11ef89070242ac120005",
|
1379 |
-
"language": "English",
|
1380 |
-
"name": "Test_assistant",
|
1381 |
-
"parse_method": "naive",
|
1382 |
-
"parser_config": {
|
1383 |
-
"pages": [
|
1384 |
-
[
|
1385 |
-
1,
|
1386 |
-
1000000
|
1387 |
-
]
|
1388 |
-
]
|
1389 |
-
},
|
1390 |
-
"permission": "me",
|
1391 |
-
"tenant_id": "4fb0cd625f9311efba4a0242ac120006"
|
1392 |
-
}
|
1393 |
-
],
|
1394 |
"language": "English",
|
1395 |
"llm": {
|
1396 |
"frequency_penalty": 0.7,
|
1397 |
"max_tokens": 512,
|
1398 |
-
"model_name": "
|
1399 |
"presence_penalty": 0.4,
|
1400 |
"temperature": 0.1,
|
1401 |
"top_p": 0.3
|
1402 |
},
|
1403 |
-
"name": "
|
1404 |
"prompt": {
|
1405 |
-
"empty_response": "Sorry!
|
1406 |
"keywords_similarity_weight": 0.3,
|
1407 |
-
"opener": "
|
1408 |
-
"prompt": "
|
1409 |
"rerank_model": "",
|
1410 |
"similarity_threshold": 0.2,
|
1411 |
"top_n": 6,
|
@@ -1420,8 +1412,8 @@ Success:
|
|
1420 |
"status": "1",
|
1421 |
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
1422 |
"top_k": 1024,
|
1423 |
-
"update_date": "
|
1424 |
-
"update_time":
|
1425 |
}
|
1426 |
}
|
1427 |
```
|
@@ -1636,56 +1628,27 @@ Success:
|
|
1636 |
"data": [
|
1637 |
{
|
1638 |
"avatar": "",
|
1639 |
-
"create_date": "Fri,
|
1640 |
-
"create_time":
|
1641 |
"description": "A helpful Assistant",
|
1642 |
"do_refer": "1",
|
1643 |
-
"id": "
|
1644 |
-
"
|
1645 |
-
{
|
1646 |
-
"avatar": "",
|
1647 |
-
"chunk_num": 0,
|
1648 |
-
"create_date": "Fri, 11 Oct 2024 03:15:18 GMT",
|
1649 |
-
"create_time": 1728616518986,
|
1650 |
-
"created_by": "69736c5e723611efb51b0242ac120007",
|
1651 |
-
"description": "",
|
1652 |
-
"doc_num": 0,
|
1653 |
-
"embd_id": "BAAI/bge-large-zh-v1.5",
|
1654 |
-
"id": "0b2cbc8c877f11ef89070242ac120005",
|
1655 |
-
"language": "English",
|
1656 |
-
"name": "test_delete_chat",
|
1657 |
-
"parser_config": {
|
1658 |
-
"chunk_token_count": 128,
|
1659 |
-
"delimiter": "\n!?。;!?",
|
1660 |
-
"layout_recognize": true,
|
1661 |
-
"task_page_size": 12
|
1662 |
-
},
|
1663 |
-
"parser_id": "naive",
|
1664 |
-
"permission": "me",
|
1665 |
-
"similarity_threshold": 0.2,
|
1666 |
-
"status": "1",
|
1667 |
-
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
1668 |
-
"token_num": 0,
|
1669 |
-
"update_date": "Fri, 11 Oct 2024 04:01:31 GMT",
|
1670 |
-
"update_time": 1728619291228,
|
1671 |
-
"vector_similarity_weight": 0.3
|
1672 |
-
}
|
1673 |
-
],
|
1674 |
"language": "English",
|
1675 |
"llm": {
|
1676 |
"frequency_penalty": 0.7,
|
1677 |
"max_tokens": 512,
|
1678 |
-
"model_name": "
|
1679 |
"presence_penalty": 0.4,
|
1680 |
"temperature": 0.1,
|
1681 |
"top_p": 0.3
|
1682 |
},
|
1683 |
-
"name": "
|
1684 |
"prompt": {
|
1685 |
-
"empty_response": "Sorry!
|
1686 |
"keywords_similarity_weight": 0.3,
|
1687 |
-
"opener": "
|
1688 |
-
"prompt": "
|
1689 |
"rerank_model": "",
|
1690 |
"similarity_threshold": 0.2,
|
1691 |
"top_n": 6,
|
@@ -1700,8 +1663,8 @@ Success:
|
|
1700 |
"status": "1",
|
1701 |
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
1702 |
"top_k": 1024,
|
1703 |
-
"update_date": "Fri,
|
1704 |
-
"update_time":
|
1705 |
}
|
1706 |
]
|
1707 |
}
|
@@ -2035,78 +1998,55 @@ Success:
|
|
2035 |
data: {
|
2036 |
"code": 0,
|
2037 |
"data": {
|
2038 |
-
"answer": "
|
2039 |
-
"reference": {},
|
2040 |
-
"audio_binary": null,
|
2041 |
-
"id": "31153052-7bac-4741-a513-ed07d853f29e"
|
2042 |
-
}
|
2043 |
-
}
|
2044 |
-
|
2045 |
-
data: {
|
2046 |
-
"code": 0,
|
2047 |
-
"data": {
|
2048 |
-
"answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助",
|
2049 |
"reference": {},
|
2050 |
"audio_binary": null,
|
2051 |
-
"id": "
|
|
|
2052 |
}
|
2053 |
}
|
2054 |
|
2055 |
data: {
|
2056 |
"code": 0,
|
2057 |
"data": {
|
2058 |
-
"answer": "
|
2059 |
"reference": {},
|
2060 |
"audio_binary": null,
|
2061 |
-
"id": "
|
|
|
2062 |
}
|
2063 |
}
|
2064 |
|
2065 |
data: {
|
2066 |
"code": 0,
|
2067 |
"data": {
|
2068 |
-
"answer": "
|
2069 |
"reference": {},
|
2070 |
"audio_binary": null,
|
2071 |
-
"id": "
|
|
|
2072 |
}
|
2073 |
}
|
2074 |
|
2075 |
data: {
|
2076 |
"code": 0,
|
2077 |
"data": {
|
2078 |
-
"answer": "
|
2079 |
"reference": {
|
2080 |
-
"total":
|
2081 |
"chunks": [
|
2082 |
-
{
|
2083 |
-
"chunk_id": "9d87f9d70a0d8a7565694a81fd4c5d5f",
|
2084 |
-
"content_ltks": "当所有知识库内容都与问题无关时 ,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n以下是知识库:\r\n{knowledg}\r\n以上是知识库\r\n\"\"\"\r\n 1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n总结\r\n通过上面的介绍,可以对开源的 ragflow有了一个大致的了解,与前面的有道qanyth整体流程还是比较类似的。 ",
|
2085 |
-
"content_with_weight": "当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。",
|
2086 |
-
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
2087 |
-
"docnm_kwd": "1.txt",
|
2088 |
-
"kb_id": "c7ee74067a2c11efb21c0242ac120006",
|
2089 |
-
"important_kwd": [],
|
2090 |
-
"img_id": "",
|
2091 |
-
"similarity": 0.38337178633282265,
|
2092 |
-
"vector_similarity": 0.3321336754679629,
|
2093 |
-
"term_similarity": 0.4053309767034769,
|
2094 |
-
"positions": [
|
2095 |
-
""
|
2096 |
-
]
|
2097 |
-
},
|
2098 |
{
|
2099 |
"chunk_id": "895d34de762e674b43e8613c6fb54c6d",
|
2100 |
-
"content_ltks": "\r\n\r\n
|
2101 |
-
"content_with_weight": "\r\n\r\n
|
2102 |
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
2103 |
"docnm_kwd": "1.txt",
|
2104 |
"kb_id": "c7ee74067a2c11efb21c0242ac120006",
|
2105 |
"important_kwd": [],
|
2106 |
"img_id": "",
|
2107 |
-
"similarity": 0.
|
2108 |
-
"vector_similarity": 0.
|
2109 |
-
"term_similarity": 0.
|
2110 |
"positions": [
|
2111 |
""
|
2112 |
]
|
@@ -2116,12 +2056,13 @@ data: {
|
|
2116 |
{
|
2117 |
"doc_name": "1.txt",
|
2118 |
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
2119 |
-
"count":
|
2120 |
}
|
2121 |
]
|
2122 |
},
|
2123 |
-
"prompt": "
|
2124 |
-
"id": "
|
|
|
2125 |
}
|
2126 |
}
|
2127 |
|
|
|
31 |
- `"language"`: `string`
|
32 |
- `"embedding_model"`: `string`
|
33 |
- `"permission"`: `string`
|
34 |
+
- `"chunk_method"`: `string`
|
35 |
- `"parser_config"`: `Dataset.ParserConfig`
|
36 |
|
37 |
#### Request example
|
|
|
41 |
--url http://{address}/api/v1/dataset \
|
42 |
--header 'Content-Type: application/json' \
|
43 |
--header 'Authorization: Bearer {YOUR_API_KEY}' \
|
44 |
+
--data '{
|
45 |
+
"name": "test_1"
|
46 |
+
}'
|
|
|
|
|
47 |
```
|
48 |
|
49 |
#### Request parameters
|
|
|
107 |
"data": {
|
108 |
"avatar": null,
|
109 |
"chunk_count": 0,
|
110 |
+
"chunk_method": "naive",
|
111 |
+
"create_date": "Thu, 24 Oct 2024 09:14:07 GMT",
|
112 |
+
"create_time": 1729761247434,
|
113 |
"created_by": "69736c5e723611efb51b0242ac120007",
|
114 |
"description": null,
|
115 |
"document_count": 0,
|
116 |
"embedding_model": "BAAI/bge-large-zh-v1.5",
|
117 |
+
"id": "527fa74891e811ef9c650242ac120006",
|
118 |
"language": "English",
|
119 |
"name": "test_1",
|
|
|
120 |
"parser_config": {
|
121 |
+
"chunk_token_num": 128,
|
122 |
+
"delimiter": "\\n!?;。;!?",
|
123 |
+
"html4excel": false,
|
124 |
+
"layout_recognize": true,
|
125 |
+
"raptor": {
|
126 |
+
"user_raptor": false
|
127 |
+
}
|
128 |
},
|
129 |
"permission": "me",
|
130 |
"similarity_threshold": 0.2,
|
131 |
"status": "1",
|
132 |
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
133 |
"token_num": 0,
|
134 |
+
"update_date": "Thu, 24 Oct 2024 09:14:07 GMT",
|
135 |
+
"update_time": 1729761247434,
|
136 |
"vector_similarity_weight": 0.3
|
137 |
}
|
138 |
}
|
|
|
228 |
--header 'Authorization: Bearer {YOUR_API_KEY}' \
|
229 |
--data '
|
230 |
{
|
231 |
+
"name": "updated_dataset",
|
|
|
|
|
232 |
}'
|
233 |
```
|
234 |
|
|
|
333 |
"id": "6e211ee0723611efa10a0242ac120007",
|
334 |
"language": "English",
|
335 |
"name": "mysql",
|
336 |
+
"chunk_method": "knowledge_graph",
|
337 |
"parser_config": {
|
338 |
"chunk_token_num": 8192,
|
339 |
"delimiter": "\\n!?;。;!?",
|
|
|
415 |
|
416 |
```json
|
417 |
{
|
418 |
+
"code": 0,
|
419 |
+
"data": [
|
420 |
+
{
|
421 |
+
"chunk_method": "naive",
|
422 |
+
"created_by": "69736c5e723611efb51b0242ac120007",
|
423 |
+
"dataset_id": "527fa74891e811ef9c650242ac120006",
|
424 |
+
"id": "b330ec2e91ec11efbc510242ac120004",
|
425 |
+
"location": "1.txt",
|
426 |
+
"name": "1.txt",
|
427 |
+
"parser_config": {
|
428 |
+
"chunk_token_num": 128,
|
429 |
+
"delimiter": "\\n!?;。;!?",
|
430 |
+
"html4excel": false,
|
431 |
+
"layout_recognize": true,
|
432 |
+
"raptor": {
|
433 |
+
"user_raptor": false
|
434 |
+
}
|
435 |
+
},
|
436 |
+
"run": "UNSTART",
|
437 |
+
"size": 17966,
|
438 |
+
"thumbnail": "",
|
439 |
+
"type": "doc"
|
440 |
+
}
|
441 |
+
]
|
442 |
}
|
443 |
```
|
444 |
|
|
|
572 |
Success:
|
573 |
|
574 |
```text
|
575 |
+
This is a test to verify the file download functionality.
|
576 |
```
|
577 |
|
578 |
Failure:
|
|
|
973 |
{
|
974 |
"code": 0,
|
975 |
"data": {
|
976 |
+
"chunks": [
|
977 |
+
{
|
978 |
+
"available_int": 1,
|
979 |
+
"content": "This is a test content.",
|
980 |
+
"docnm_kwd": "1.txt",
|
981 |
+
"document_id": "b330ec2e91ec11efbc510242ac120004",
|
982 |
+
"id": "b48c170e90f70af998485c1065490726",
|
983 |
+
"image_id": "",
|
984 |
+
"important_keywords": "",
|
985 |
+
"positions": [
|
986 |
+
""
|
987 |
+
]
|
988 |
+
}
|
989 |
+
],
|
990 |
"doc": {
|
991 |
+
"chunk_count": 1,
|
992 |
+
"chunk_method": "naive",
|
993 |
+
"create_date": "Thu, 24 Oct 2024 09:45:27 GMT",
|
994 |
+
"create_time": 1729763127646,
|
995 |
"created_by": "69736c5e723611efb51b0242ac120007",
|
996 |
+
"dataset_id": "527fa74891e811ef9c650242ac120006",
|
997 |
+
"id": "b330ec2e91ec11efbc510242ac120004",
|
998 |
+
"location": "1.txt",
|
999 |
+
"name": "1.txt",
|
1000 |
"parser_config": {
|
1001 |
+
"chunk_token_num": 128,
|
1002 |
+
"delimiter": "\\n!?;。;!?",
|
1003 |
+
"html4excel": false,
|
1004 |
+
"layout_recognize": true,
|
1005 |
+
"raptor": {
|
1006 |
+
"user_raptor": false
|
1007 |
+
}
|
1008 |
},
|
1009 |
+
"process_begin_at": "Thu, 24 Oct 2024 09:56:44 GMT",
|
1010 |
+
"process_duation": 0.54213,
|
1011 |
+
"progress": 0.0,
|
1012 |
+
"progress_msg": "Task dispatched...",
|
1013 |
+
"run": "2",
|
1014 |
+
"size": 17966,
|
|
|
1015 |
"source_type": "local",
|
1016 |
"status": "1",
|
1017 |
+
"thumbnail": "",
|
1018 |
+
"token_count": 8,
|
1019 |
"type": "doc",
|
1020 |
+
"update_date": "Thu, 24 Oct 2024 11:03:15 GMT",
|
1021 |
+
"update_time": 1729767795721
|
1022 |
},
|
1023 |
+
"total": 1
|
1024 |
}
|
1025 |
}
|
1026 |
```
|
|
|
1321 |
--header 'Content-Type: application/json' \
|
1322 |
--header 'Authorization: Bearer {YOUR_API_KEY}'
|
1323 |
--data '{
|
1324 |
+
"dataset_ids": ["0b2cbc8c877f11ef89070242ac120005"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1325 |
"name":"new_chat_1"
|
1326 |
}'
|
1327 |
```
|
|
|
1375 |
"code": 0,
|
1376 |
"data": {
|
1377 |
"avatar": "",
|
1378 |
+
"create_date": "Thu, 24 Oct 2024 11:18:29 GMT",
|
1379 |
+
"create_time": 1729768709023,
|
1380 |
+
"dataset_ids": [
|
1381 |
+
"527fa74891e811ef9c650242ac120006"
|
1382 |
+
],
|
1383 |
"description": "A helpful Assistant",
|
1384 |
"do_refer": "1",
|
1385 |
+
"id": "b1f2f15691f911ef81180242ac120003",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1386 |
"language": "English",
|
1387 |
"llm": {
|
1388 |
"frequency_penalty": 0.7,
|
1389 |
"max_tokens": 512,
|
1390 |
+
"model_name": "qwen-plus@Tongyi-Qianwen",
|
1391 |
"presence_penalty": 0.4,
|
1392 |
"temperature": 0.1,
|
1393 |
"top_p": 0.3
|
1394 |
},
|
1395 |
+
"name": "12234",
|
1396 |
"prompt": {
|
1397 |
+
"empty_response": "Sorry! No relevant content was found in the knowledge base!",
|
1398 |
"keywords_similarity_weight": 0.3,
|
1399 |
+
"opener": "Hi! I'm your assistant, what can I do for you?",
|
1400 |
+
"prompt": "You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.",
|
1401 |
"rerank_model": "",
|
1402 |
"similarity_threshold": 0.2,
|
1403 |
"top_n": 6,
|
|
|
1412 |
"status": "1",
|
1413 |
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
1414 |
"top_k": 1024,
|
1415 |
+
"update_date": "Thu, 24 Oct 2024 11:18:29 GMT",
|
1416 |
+
"update_time": 1729768709023
|
1417 |
}
|
1418 |
}
|
1419 |
```
|
|
|
1628 |
"data": [
|
1629 |
{
|
1630 |
"avatar": "",
|
1631 |
+
"create_date": "Fri, 18 Oct 2024 06:20:06 GMT",
|
1632 |
+
"create_time": 1729232406637,
|
1633 |
"description": "A helpful Assistant",
|
1634 |
"do_refer": "1",
|
1635 |
+
"id": "04d0d8e28d1911efa3630242ac120006",
|
1636 |
+
"dataset_ids": ["527fa74891e811ef9c650242ac120006"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1637 |
"language": "English",
|
1638 |
"llm": {
|
1639 |
"frequency_penalty": 0.7,
|
1640 |
"max_tokens": 512,
|
1641 |
+
"model_name": "qwen-plus@Tongyi-Qianwen",
|
1642 |
"presence_penalty": 0.4,
|
1643 |
"temperature": 0.1,
|
1644 |
"top_p": 0.3
|
1645 |
},
|
1646 |
+
"name": "13243",
|
1647 |
"prompt": {
|
1648 |
+
"empty_response": "Sorry! No relevant content was found in the knowledge base!",
|
1649 |
"keywords_similarity_weight": 0.3,
|
1650 |
+
"opener": "Hi! I'm your assistant, what can I do for you?",
|
1651 |
+
"prompt": "You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.",
|
1652 |
"rerank_model": "",
|
1653 |
"similarity_threshold": 0.2,
|
1654 |
"top_n": 6,
|
|
|
1663 |
"status": "1",
|
1664 |
"tenant_id": "69736c5e723611efb51b0242ac120007",
|
1665 |
"top_k": 1024,
|
1666 |
+
"update_date": "Fri, 18 Oct 2024 06:20:06 GMT",
|
1667 |
+
"update_time": 1729232406638
|
1668 |
}
|
1669 |
]
|
1670 |
}
|
|
|
1998 |
data: {
|
1999 |
"code": 0,
|
2000 |
"data": {
|
2001 |
+
"answer": "I am an intelligent assistant designed to help you with your inquiries. I can provide",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2002 |
"reference": {},
|
2003 |
"audio_binary": null,
|
2004 |
+
"id": "d8e5ebb6-6b52-4fd1-bd02-35b52ba3acaa",
|
2005 |
+
"session_id": "e14344d08d1a11efb6210242ac120004"
|
2006 |
}
|
2007 |
}
|
2008 |
|
2009 |
data: {
|
2010 |
"code": 0,
|
2011 |
"data": {
|
2012 |
+
"answer": "I am an intelligent assistant designed to help you with your inquiries. I can provide information, answer questions, and assist with tasks based on the knowledge available to me",
|
2013 |
"reference": {},
|
2014 |
"audio_binary": null,
|
2015 |
+
"id": "d8e5ebb6-6b52-4fd1-bd02-35b52ba3acaa",
|
2016 |
+
"session_id": "e14344d08d1a11efb6210242ac120004"
|
2017 |
}
|
2018 |
}
|
2019 |
|
2020 |
data: {
|
2021 |
"code": 0,
|
2022 |
"data": {
|
2023 |
+
"answer": "I am an intelligent assistant designed to help you with your inquiries. I can provide information, answer questions, and assist with tasks based on the knowledge available to me. How can I assist you today?",
|
2024 |
"reference": {},
|
2025 |
"audio_binary": null,
|
2026 |
+
"id": "d8e5ebb6-6b52-4fd1-bd02-35b52ba3acaa",
|
2027 |
+
"session_id": "e14344d08d1a11efb6210242ac120004"
|
2028 |
}
|
2029 |
}
|
2030 |
|
2031 |
data: {
|
2032 |
"code": 0,
|
2033 |
"data": {
|
2034 |
+
"answer": "I am an intelligent assistant designed to help you with your inquiries. I can provide information, answer questions, and assist with tasks based on the knowledge available to me ##0$$. How can I assist you today?",
|
2035 |
"reference": {
|
2036 |
+
"total": 8,
|
2037 |
"chunks": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2038 |
{
|
2039 |
"chunk_id": "895d34de762e674b43e8613c6fb54c6d",
|
2040 |
+
"content_ltks": "xxxx\r\n\r\n\"\"\"\r\nyou are an intellig assistant. pleas summar the content of the knowledg base to answer the question. pleas list thedata in the knowledg base and answer in detail. when all knowledg base content is irrelev to the question , your answer must includ the sentenc\"the answer you are lookfor isnot found in the knowledg base!\" answer needto consid chat history.\r\n here is the knowledg base:\r\n{ knowledg}\r\nthe abov is the knowledg base.\r\n\"\"\"\r\n1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\nxxxx ",
|
2041 |
+
"content_with_weight": "xxxx\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\nxxxx\r\n\r\n\"\"\"\r\nxxxx",
|
2042 |
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
2043 |
"docnm_kwd": "1.txt",
|
2044 |
"kb_id": "c7ee74067a2c11efb21c0242ac120006",
|
2045 |
"important_kwd": [],
|
2046 |
"img_id": "",
|
2047 |
+
"similarity": 0.4442746624416507,
|
2048 |
+
"vector_similarity": 0.3843936320913369,
|
2049 |
+
"term_similarity": 0.4699379611632138,
|
2050 |
"positions": [
|
2051 |
""
|
2052 |
]
|
|
|
2056 |
{
|
2057 |
"doc_name": "1.txt",
|
2058 |
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
2059 |
+
"count": 1
|
2060 |
}
|
2061 |
]
|
2062 |
},
|
2063 |
+
"prompt": "xxxx\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\nxxxx\n\n### Query:\nwho are you,please answer me in English\n\n### Elapsed\n - Retrieval: 332.2 ms\n - LLM: 2972.1 ms",
|
2064 |
+
"id": "d8e5ebb6-6b52-4fd1-bd02-35b52ba3acaa",
|
2065 |
+
"session_id": "e14344d08d1a11efb6210242ac120004"
|
2066 |
}
|
2067 |
}
|
2068 |
|
api/utils/api_utils.py
CHANGED
@@ -337,7 +337,7 @@ def valid(permission,valid_permission,language,valid_language,chunk_method,valid
|
|
337 |
|
338 |
def valid_parameter(parameter,valid_values):
|
339 |
if parameter and parameter not in valid_values:
|
340 |
-
return get_error_data_result(f"{parameter} is not in {valid_values}")
|
341 |
|
342 |
def get_parser_config(chunk_method,parser_config):
|
343 |
if parser_config:
|
|
|
337 |
|
338 |
def valid_parameter(parameter,valid_values):
|
339 |
if parameter and parameter not in valid_values:
|
340 |
+
return get_error_data_result(f"`{parameter}` is not in {valid_values}")
|
341 |
|
342 |
def get_parser_config(chunk_method,parser_config):
|
343 |
if parser_config:
|
sdk/python/ragflow/modules/document.py
CHANGED
@@ -70,7 +70,7 @@ class Document(Base):
|
|
70 |
return Chunk(self.rag,res["data"].get("chunk"))
|
71 |
raise Exception(res.get("message"))
|
72 |
|
73 |
-
def delete_chunks(self,ids:List[str]):
|
74 |
res = self.rm(f"dataset/{self.dataset_id}/document/{self.id}/chunk",{"ids":ids})
|
75 |
res = res.json()
|
76 |
if res.get("code")!=0:
|
|
|
70 |
return Chunk(self.rag,res["data"].get("chunk"))
|
71 |
raise Exception(res.get("message"))
|
72 |
|
73 |
+
def delete_chunks(self,ids:List[str] = None):
|
74 |
res = self.rm(f"dataset/{self.dataset_id}/document/{self.id}/chunk",{"ids":ids})
|
75 |
res = res.json()
|
76 |
if res.get("code")!=0:
|