Spaces:
Paused
Paused
Commit
·
0159ca5
1
Parent(s):
2389c43
url loading testing
Browse files- app.py +5 -1
- helper_functions.py +3 -7
app.py
CHANGED
@@ -21,6 +21,7 @@ def rename(orig_author: str):
|
|
21 |
|
22 |
@cl.on_message
|
23 |
async def main(message: cl.Message):
|
|
|
24 |
if message.content.startswith("http://") or message.content.startswith("https://"):
|
25 |
message_type = "url"
|
26 |
else:
|
@@ -43,7 +44,7 @@ async def main(message: cl.Message):
|
|
43 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
44 |
|
45 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
46 |
-
|
47 |
except Exception as e:
|
48 |
await cl.Message(f"Error processing the document: {e}").send()
|
49 |
else:
|
@@ -85,13 +86,16 @@ async def handle_response(res):
|
|
85 |
|
86 |
# load the file
|
87 |
docs = await asyncio.to_thread(process_file, file)
|
|
|
88 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
|
|
89 |
for i, doc in enumerate(splits):
|
90 |
doc.metadata["user_upload_source"] = f"source_{i}"
|
91 |
print(f"Processing {len(docs)} text chunks")
|
92 |
|
93 |
# Add to the qdrant_store
|
94 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
|
|
95 |
|
96 |
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
97 |
await msg.update()
|
|
|
21 |
|
22 |
@cl.on_message
|
23 |
async def main(message: cl.Message):
|
24 |
+
await cl.Message(f"Processing `{message.content}`", disable_human_feedback=True)
|
25 |
if message.content.startswith("http://") or message.content.startswith("https://"):
|
26 |
message_type = "url"
|
27 |
else:
|
|
|
44 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
45 |
|
46 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
47 |
+
|
48 |
except Exception as e:
|
49 |
await cl.Message(f"Error processing the document: {e}").send()
|
50 |
else:
|
|
|
86 |
|
87 |
# load the file
|
88 |
docs = await asyncio.to_thread(process_file, file)
|
89 |
+
await cl.Message(content="loaded docs").send()
|
90 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
91 |
+
await cl.Message(content="split docs").send()
|
92 |
for i, doc in enumerate(splits):
|
93 |
doc.metadata["user_upload_source"] = f"source_{i}"
|
94 |
print(f"Processing {len(docs)} text chunks")
|
95 |
|
96 |
# Add to the qdrant_store
|
97 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
98 |
+
await cl.Message(content="added to vs").send()
|
99 |
|
100 |
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
101 |
await msg.update()
|
helper_functions.py
CHANGED
@@ -14,17 +14,13 @@ def process_file(file):
|
|
14 |
temp_file = "./"+file.path
|
15 |
with open(temp_file, "wb") as file:
|
16 |
file.write(file.content)
|
17 |
-
|
18 |
-
documents = []
|
19 |
if file.path.endswith(".pdf"):
|
20 |
loader = PyMuPDFLoader(temp_file)
|
21 |
-
|
22 |
-
documents.extend(docs)
|
23 |
else:
|
24 |
loader = TextLoader(temp_file)
|
25 |
-
|
26 |
-
documents.extend(docs)
|
27 |
-
return documents
|
28 |
|
29 |
def load_documents_from_url(url):
|
30 |
try:
|
|
|
14 |
temp_file = "./"+file.path
|
15 |
with open(temp_file, "wb") as file:
|
16 |
file.write(file.content)
|
17 |
+
|
|
|
18 |
if file.path.endswith(".pdf"):
|
19 |
loader = PyMuPDFLoader(temp_file)
|
20 |
+
return loader.load()
|
|
|
21 |
else:
|
22 |
loader = TextLoader(temp_file)
|
23 |
+
return loader.load()
|
|
|
|
|
24 |
|
25 |
def load_documents_from_url(url):
|
26 |
try:
|