anasmkh commited on
Commit
6863650
·
verified ·
1 Parent(s): fdd2048

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -28
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import shutil
 
3
  import gradio as gr
4
  import qdrant_client
5
  from getpass import getpass
@@ -33,13 +34,14 @@ client = None
33
  vector_store = None
34
  storage_context = None
35
 
36
- # Use a persistent folder to keep uploaded files.
 
 
 
37
  upload_dir = "uploaded_files"
38
  if not os.path.exists(upload_dir):
39
  os.makedirs(upload_dir)
40
-
41
- # A set to track which files have already been processed.
42
- processed_files = set()
43
 
44
  # -------------------------------------------------------
45
  # Function to process uploaded files and update the index.
@@ -47,45 +49,66 @@ processed_files = set()
47
  def process_upload(files):
48
  """
49
  Accepts a list of uploaded file paths, saves them to a persistent folder,
50
- loads only new documents, and builds (or updates) the vector index and chat engine.
51
  """
52
- global client, vector_store, storage_context, index, query_engine, memory, chat_engine, processed_files
53
 
 
54
  new_file_paths = []
55
- # Loop over each uploaded file.
56
  for file_path in files:
57
  file_name = os.path.basename(file_path)
58
  dest = os.path.join(upload_dir, file_name)
59
- # If the file is not already in our folder, copy it.
60
- if file_name not in processed_files:
61
- if not os.path.exists(dest):
62
- shutil.copy(file_path, dest)
63
  new_file_paths.append(dest)
64
- processed_files.add(file_name)
65
 
 
66
  if not new_file_paths:
67
  return "No new documents to add."
68
 
69
  # Load only the new documents.
70
  new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
71
 
72
- # If this is the first upload, build the index from scratch.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  if index is None:
74
- # (Here we use an in-memory Qdrant client. Change ":memory:" to a persistent path if needed.)
75
- client = qdrant_client.QdrantClient(location=":memory:")
76
- vector_store = QdrantVectorStore(
77
- collection_name="paper",
78
- client=client,
79
- enable_hybrid=True,
80
- batch_size=20,
81
  )
82
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
83
- index = VectorStoreIndex.from_documents(new_documents, storage_context=storage_context)
84
  else:
85
- # Otherwise, insert the new documents into the existing index.
86
  index.insert_documents(new_documents)
87
 
88
- # Reinitialize query and chat engines so they use the updated index.
89
  query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
90
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
91
  chat_engine = index.as_chat_engine(
@@ -104,15 +127,12 @@ def process_upload(files):
104
  # -------------------------------------------------------
105
  def chat_with_ai(user_input, chat_history):
106
  global chat_engine
107
- # Check if the chat engine is initialized.
108
  if chat_engine is None:
109
  return chat_history, "Please upload documents first."
110
 
111
  response = chat_engine.chat(user_input)
112
  references = response.source_nodes
113
  ref = []
114
-
115
- # Extract file names from the source nodes (if available)
116
  for node in references:
117
  file_name = node.metadata.get('file_name')
118
  if file_name and file_name not in ref:
@@ -135,9 +155,9 @@ def gradio_interface():
135
  with gr.Blocks() as demo:
136
  gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
137
 
 
138
  with gr.Tab("Upload Documents"):
139
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
140
- # The file upload widget: we specify allowed file types.
141
  file_upload = gr.File(
142
  label="Upload Files",
143
  file_count="multiple",
 
1
  import os
2
  import shutil
3
+ import time
4
  import gradio as gr
5
  import qdrant_client
6
  from getpass import getpass
 
34
  vector_store = None
35
  storage_context = None
36
 
37
+ # Define a persistent collection name.
38
+ collection_name = "paper"
39
+
40
+ # Use a persistent folder to store uploaded files.
41
  upload_dir = "uploaded_files"
42
  if not os.path.exists(upload_dir):
43
  os.makedirs(upload_dir)
44
+ # We no longer clear the folder so previously uploaded files are retained.
 
 
45
 
46
  # -------------------------------------------------------
47
  # Function to process uploaded files and update the index.
 
49
  def process_upload(files):
50
  """
51
  Accepts a list of uploaded file paths, saves them to a persistent folder,
52
+ loads new documents, and builds or updates the vector index and chat engine.
53
  """
54
+ global client, vector_store, storage_context, index, query_engine, memory, chat_engine
55
 
56
+ # Copy files into the upload directory if not already present.
57
  new_file_paths = []
 
58
  for file_path in files:
59
  file_name = os.path.basename(file_path)
60
  dest = os.path.join(upload_dir, file_name)
61
+ if not os.path.exists(dest):
62
+ shutil.copy(file_path, dest)
 
 
63
  new_file_paths.append(dest)
 
64
 
65
+ # If no new files are uploaded, notify the user.
66
  if not new_file_paths:
67
  return "No new documents to add."
68
 
69
  # Load only the new documents.
70
  new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
71
 
72
+ # Initialize a persistent Qdrant client.
73
+ client = qdrant_client.QdrantClient(
74
+ path="./qdrant_db",
75
+ prefer_grpc=True
76
+ )
77
+
78
+ # Ensure the collection exists.
79
+ from qdrant_client.http import models
80
+ existing_collections = {col.name for col in client.get_collections().collections}
81
+ if collection_name not in existing_collections:
82
+ client.create_collection(
83
+ collection_name=collection_name,
84
+ vectors_config=models.VectorParams(
85
+ size=1536, # text-embedding-ada-002 produces 1536-dimensional vectors.
86
+ distance=models.Distance.COSINE
87
+ )
88
+ )
89
+ # Wait briefly for the collection creation to complete.
90
+ time.sleep(1)
91
+
92
+ # Initialize (or re-use) the vector store.
93
+ vector_store = QdrantVectorStore(
94
+ collection_name=collection_name,
95
+ client=client,
96
+ enable_hybrid=True,
97
+ batch_size=20,
98
+ )
99
+
100
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
101
+
102
+ # Build the index if it doesn't exist; otherwise, update it.
103
  if index is None:
104
+ index = VectorStoreIndex.from_documents(
105
+ SimpleDirectoryReader(upload_dir).load_data(),
106
+ storage_context=storage_context
 
 
 
 
107
  )
 
 
108
  else:
 
109
  index.insert_documents(new_documents)
110
 
111
+ # Reinitialize query and chat engines to reflect updates.
112
  query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
113
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
114
  chat_engine = index.as_chat_engine(
 
127
  # -------------------------------------------------------
128
  def chat_with_ai(user_input, chat_history):
129
  global chat_engine
 
130
  if chat_engine is None:
131
  return chat_history, "Please upload documents first."
132
 
133
  response = chat_engine.chat(user_input)
134
  references = response.source_nodes
135
  ref = []
 
 
136
  for node in references:
137
  file_name = node.metadata.get('file_name')
138
  if file_name and file_name not in ref:
 
155
  with gr.Blocks() as demo:
156
  gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
157
 
158
+ # Use Tabs to separate the file upload and chat interfaces.
159
  with gr.Tab("Upload Documents"):
160
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
 
161
  file_upload = gr.File(
162
  label="Upload Files",
163
  file_count="multiple",