Spaces:
Paused
Paused
Commit
·
b745365
1
Parent(s):
cc60679
file loading testing
Browse files- app.py +4 -3
- helper_functions.py +12 -20
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import chainlit as cl
|
2 |
-
from helper_functions import process_file, load_documents_from_url,
|
3 |
import models
|
4 |
import agents
|
5 |
import asyncio
|
@@ -44,7 +44,7 @@ async def main(message: cl.Message):
|
|
44 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
45 |
|
46 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
47 |
-
|
48 |
except Exception as e:
|
49 |
await cl.Message(f"Error processing the document: {e}").send()
|
50 |
else:
|
@@ -85,7 +85,8 @@ async def handle_response(res):
|
|
85 |
await msg.send()
|
86 |
|
87 |
# load the file
|
88 |
-
|
|
|
89 |
await cl.Message(content="loaded docs").send()
|
90 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
91 |
await cl.Message(content="split docs").send()
|
|
|
1 |
import chainlit as cl
|
2 |
+
from helper_functions import process_file, load_documents_from_url, store_uploaded_file
|
3 |
import models
|
4 |
import agents
|
5 |
import asyncio
|
|
|
44 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
45 |
|
46 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
47 |
+
|
48 |
except Exception as e:
|
49 |
await cl.Message(f"Error processing the document: {e}").send()
|
50 |
else:
|
|
|
85 |
await msg.send()
|
86 |
|
87 |
# load the file
|
88 |
+
file_path = store_uploaded_file(file)
|
89 |
+
docs = await asyncio.to_thread(process_file, file_path)
|
90 |
await cl.Message(content="loaded docs").send()
|
91 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
92 |
await cl.Message(content="split docs").send()
|
helper_functions.py
CHANGED
@@ -8,32 +8,24 @@ from langchain_core.language_models import BaseLanguageModel
|
|
8 |
import os
|
9 |
import functools
|
10 |
import requests
|
|
|
11 |
|
12 |
-
def
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
# Determine the file type and load it accordingly
|
21 |
-
if uploaded_file.name.endswith(".pdf"):
|
22 |
# Load PDF with PyMuPDFLoader
|
23 |
-
loader = PyMuPDFLoader(
|
24 |
-
elif
|
25 |
# Load text file with TextLoader
|
26 |
-
loader = TextLoader(
|
27 |
else:
|
28 |
raise ValueError("Unsupported file format. Only PDF and TXT are supported.")
|
29 |
|
30 |
-
|
31 |
-
documents = loader.load()
|
32 |
-
|
33 |
-
# Clean up the temporary file
|
34 |
-
os.remove(temp_file_path)
|
35 |
-
|
36 |
-
return documents
|
37 |
|
38 |
def load_documents_from_url(url):
|
39 |
try:
|
|
|
8 |
import os
|
9 |
import functools
|
10 |
import requests
|
11 |
+
from chainlit.types import AskFileResponse
|
12 |
|
13 |
+
def store_uploaded_file(uploaded_file: AskFileResponse):
|
14 |
+
file_path = f"./tmp/{uploaded_file.name}"
|
15 |
+
open(file_path, "wb").write(uploaded_file.content)
|
16 |
+
return file_path
|
17 |
+
|
18 |
+
def process_file(file_path):
|
19 |
+
if file_path.endswith(".pdf"):
|
|
|
|
|
|
|
20 |
# Load PDF with PyMuPDFLoader
|
21 |
+
loader = PyMuPDFLoader(file_path)
|
22 |
+
elif file_path.endswith(".txt"):
|
23 |
# Load text file with TextLoader
|
24 |
+
loader = TextLoader(file_path)
|
25 |
else:
|
26 |
raise ValueError("Unsupported file format. Only PDF and TXT are supported.")
|
27 |
|
28 |
+
return loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def load_documents_from_url(url):
|
31 |
try:
|