angry-meow commited on
Commit
b745365
·
1 Parent(s): cc60679

file loading testing

Browse files
Files changed (2) hide show
  1. app.py +4 -3
  2. helper_functions.py +12 -20
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import chainlit as cl
2
- from helper_functions import process_file, load_documents_from_url, add_to_qdrant
3
  import models
4
  import agents
5
  import asyncio
@@ -44,7 +44,7 @@ async def main(message: cl.Message):
44
  await asyncio.to_thread(qdrant_store.add_documents, splits)
45
 
46
  await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
47
-
48
  except Exception as e:
49
  await cl.Message(f"Error processing the document: {e}").send()
50
  else:
@@ -85,7 +85,8 @@ async def handle_response(res):
85
  await msg.send()
86
 
87
  # load the file
88
- docs = await asyncio.to_thread(process_file, file)
 
89
  await cl.Message(content="loaded docs").send()
90
  splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
91
  await cl.Message(content="split docs").send()
 
1
  import chainlit as cl
2
+ from helper_functions import process_file, load_documents_from_url, store_uploaded_file
3
  import models
4
  import agents
5
  import asyncio
 
44
  await asyncio.to_thread(qdrant_store.add_documents, splits)
45
 
46
  await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
47
+
48
  except Exception as e:
49
  await cl.Message(f"Error processing the document: {e}").send()
50
  else:
 
85
  await msg.send()
86
 
87
  # load the file
88
+ file_path = store_uploaded_file(file)
89
+ docs = await asyncio.to_thread(process_file, file_path)
90
  await cl.Message(content="loaded docs").send()
91
  splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
92
  await cl.Message(content="split docs").send()
helper_functions.py CHANGED
@@ -8,32 +8,24 @@ from langchain_core.language_models import BaseLanguageModel
8
  import os
9
  import functools
10
  import requests
 
11
 
12
- def process_file(uploaded_file):
13
- # Save the file temporarily to process it
14
- temp_file_path = f"/tmp/{uploaded_file.name}"
15
-
16
- with open(temp_file_path, "wb") as temp_file:
17
- # Write the uploaded file content to the temporary file
18
- temp_file.write(uploaded_file.read())
19
-
20
- # Determine the file type and load it accordingly
21
- if uploaded_file.name.endswith(".pdf"):
22
  # Load PDF with PyMuPDFLoader
23
- loader = PyMuPDFLoader(temp_file_path)
24
- elif uploaded_file.name.endswith(".txt"):
25
  # Load text file with TextLoader
26
- loader = TextLoader(temp_file_path)
27
  else:
28
  raise ValueError("Unsupported file format. Only PDF and TXT are supported.")
29
 
30
- # Load documents from the file
31
- documents = loader.load()
32
-
33
- # Clean up the temporary file
34
- os.remove(temp_file_path)
35
-
36
- return documents
37
 
38
  def load_documents_from_url(url):
39
  try:
 
8
  import os
9
  import functools
10
  import requests
11
+ from chainlit.types import AskFileResponse
12
 
13
+ def store_uploaded_file(uploaded_file: AskFileResponse):
14
+ file_path = f"./tmp/{uploaded_file.name}"
15
+ open(file_path, "wb").write(uploaded_file.content)
16
+ return file_path
17
+
18
+ def process_file(file_path):
19
+ if file_path.endswith(".pdf"):
 
 
 
20
  # Load PDF with PyMuPDFLoader
21
+ loader = PyMuPDFLoader(file_path)
22
+ elif file_path.endswith(".txt"):
23
  # Load text file with TextLoader
24
+ loader = TextLoader(file_path)
25
  else:
26
  raise ValueError("Unsupported file format. Only PDF and TXT are supported.")
27
 
28
+ return loader.load()
 
 
 
 
 
 
29
 
30
  def load_documents_from_url(url):
31
  try: