angry-meow commited on
Commit
0159ca5
·
1 Parent(s): 2389c43

url loading testing

Browse files
Files changed (2) hide show
  1. app.py +5 -1
  2. helper_functions.py +3 -7
app.py CHANGED
@@ -21,6 +21,7 @@ def rename(orig_author: str):
21
 
22
  @cl.on_message
23
  async def main(message: cl.Message):
 
24
  if message.content.startswith("http://") or message.content.startswith("https://"):
25
  message_type = "url"
26
  else:
@@ -43,7 +44,7 @@ async def main(message: cl.Message):
43
  await asyncio.to_thread(qdrant_store.add_documents, splits)
44
 
45
  await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
46
-
47
  except Exception as e:
48
  await cl.Message(f"Error processing the document: {e}").send()
49
  else:
@@ -85,13 +86,16 @@ async def handle_response(res):
85
 
86
  # load the file
87
  docs = await asyncio.to_thread(process_file, file)
 
88
  splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
 
89
  for i, doc in enumerate(splits):
90
  doc.metadata["user_upload_source"] = f"source_{i}"
91
  print(f"Processing {len(docs)} text chunks")
92
 
93
  # Add to the qdrant_store
94
  await asyncio.to_thread(qdrant_store.add_documents, splits)
 
95
 
96
  msg.content = f"Processing `{file.name}` done. You can now ask questions!"
97
  await msg.update()
 
21
 
22
  @cl.on_message
23
  async def main(message: cl.Message):
24
+ await cl.Message(f"Processing `{message.content}`", disable_human_feedback=True)
25
  if message.content.startswith("http://") or message.content.startswith("https://"):
26
  message_type = "url"
27
  else:
 
44
  await asyncio.to_thread(qdrant_store.add_documents, splits)
45
 
46
  await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
47
+
48
  except Exception as e:
49
  await cl.Message(f"Error processing the document: {e}").send()
50
  else:
 
86
 
87
  # load the file
88
  docs = await asyncio.to_thread(process_file, file)
89
+ await cl.Message(content="loaded docs").send()
90
  splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
91
+ await cl.Message(content="split docs").send()
92
  for i, doc in enumerate(splits):
93
  doc.metadata["user_upload_source"] = f"source_{i}"
94
  print(f"Processing {len(docs)} text chunks")
95
 
96
  # Add to the qdrant_store
97
  await asyncio.to_thread(qdrant_store.add_documents, splits)
98
+ await cl.Message(content="added to vs").send()
99
 
100
  msg.content = f"Processing `{file.name}` done. You can now ask questions!"
101
  await msg.update()
helper_functions.py CHANGED
@@ -14,17 +14,13 @@ def process_file(file):
14
  temp_file = "./"+file.path
15
  with open(temp_file, "wb") as file:
16
  file.write(file.content)
17
-
18
- documents = []
19
  if file.path.endswith(".pdf"):
20
  loader = PyMuPDFLoader(temp_file)
21
- docs = loader.load()
22
- documents.extend(docs)
23
  else:
24
  loader = TextLoader(temp_file)
25
- docs = loader.load()
26
- documents.extend(docs)
27
- return documents
28
 
29
  def load_documents_from_url(url):
30
  try:
 
14
  temp_file = "./"+file.path
15
  with open(temp_file, "wb") as file:
16
  file.write(file.content)
17
+
 
18
  if file.path.endswith(".pdf"):
19
  loader = PyMuPDFLoader(temp_file)
20
+ return loader.load()
 
21
  else:
22
  loader = TextLoader(temp_file)
23
+ return loader.load()
 
 
24
 
25
  def load_documents_from_url(url):
26
  try: