File size: 5,518 Bytes
ce45214 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
from ragflow import RAGFlow, DataSet, Document
from common import API_KEY, HOST_ADDRESS
from test_sdkbase import TestSdk
class TestDocument(TestSdk):
def test_upload_document_with_success(self):
"""
Test ingesting a document into a dataset with success.
"""
# Initialize RAGFlow instance
rag = RAGFlow(API_KEY, HOST_ADDRESS)
# Step 1: Create a new dataset
ds = rag.create_dataset(name="God")
# Ensure dataset creation was successful
assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
assert ds.name == "God", "Dataset name does not match."
# Step 2: Create a new document
# The blob is the actual file content or a placeholder in this case
name = "TestDocument.txt"
blob = b"Sample document content for ingestion test."
res = rag.create_document(ds, name=name, blob=blob)
# Ensure document ingestion was successful
assert res is True, f"Failed to create document, error: {res}"
def test_get_detail_document_with_success(self):
"""
Test getting a document's detail with success
"""
rag = RAGFlow(API_KEY, HOST_ADDRESS)
doc = rag.get_document(name="TestDocument.txt")
assert isinstance(doc, Document), f"Failed to get dataset, error: {doc}."
assert doc.name == "TestDocument.txt", "Name does not match"
def test_update_document_with_success(self):
"""
Test updating a document with success.
"""
rag = RAGFlow(API_KEY, HOST_ADDRESS)
doc = rag.get_document(name="TestDocument.txt")
if isinstance(doc, Document):
doc.parser_method = "manual"
res = doc.save()
assert res is True, f"Failed to update document, error: {res}"
else:
assert False, f"Failed to get document, error: {doc}"
def test_download_document_with_success(self):
"""
Test downloading a document with success.
"""
# Initialize RAGFlow instance
rag = RAGFlow(API_KEY, HOST_ADDRESS)
# Retrieve a document
doc = rag.get_document(name="TestDocument.txt")
# Check if the retrieved document is of type Document
if isinstance(doc, Document):
# Download the document content and save it to a file
try:
with open("ragflow.txt", "wb+") as file:
file.write(doc.download())
# Print the document object for debugging
print(doc)
# Assert that the download was successful
assert True, "Document downloaded successfully."
except Exception as e:
# If an error occurs, raise an assertion error
assert False, f"Failed to download document, error: {str(e)}"
else:
# If the document retrieval fails, assert failure
assert False, f"Failed to get document, error: {doc}"
def test_list_all_documents_in_dataset_with_success(self):
"""
Test list all documents into a dataset with success.
"""
# Initialize RAGFlow instance
rag = RAGFlow(API_KEY, HOST_ADDRESS)
# Step 1: Create a new dataset
ds = rag.create_dataset(name="God2")
# Ensure dataset creation was successful
assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
assert ds.name == "God2", "Dataset name does not match."
# Step 2: Create a new document
# The blob is the actual file content or a placeholder in this case
name1 = "Test Document111.txt"
blob1 = b"Sample document content for ingestion test111."
name2 = "Test Document222.txt"
blob2 = b"Sample document content for ingestion test222."
rag.create_document(ds, name=name1, blob=blob1)
rag.create_document(ds, name=name2, blob=blob2)
for d in ds.list_docs(keywords="test", offset=0, limit=12):
assert isinstance(d, Document)
print(d)
def test_delete_documents_in_dataset_with_success(self):
"""
Test list all documents into a dataset with success.
"""
# Initialize RAGFlow instance
rag = RAGFlow(API_KEY, HOST_ADDRESS)
# Step 1: Create a new dataset
ds = rag.create_dataset(name="God3")
# Ensure dataset creation was successful
assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
assert ds.name == "God3", "Dataset name does not match."
# Step 2: Create a new document
# The blob is the actual file content or a placeholder in this case
name1 = "Test Document333.txt"
blob1 = b"Sample document content for ingestion test333."
name2 = "Test Document444.txt"
blob2 = b"Sample document content for ingestion test444."
name3='test.txt'
path='test_data/test.txt'
rag.create_document(ds, name=name3, blob=open(path, "rb").read())
rag.create_document(ds, name=name1, blob=blob1)
rag.create_document(ds, name=name2, blob=blob2)
for d in ds.list_docs(keywords="document", offset=0, limit=12):
assert isinstance(d, Document)
d.delete()
print(d)
remaining_docs = ds.list_docs(keywords="rag", offset=0, limit=12)
assert len(remaining_docs) == 0, "Documents were not properly deleted."
|