anasmkh commited on
Commit
95989dc
·
verified ·
1 Parent(s): 02a57c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -62
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import shutil
3
- import time
4
  import gradio as gr
5
  import qdrant_client
6
  from getpass import getpass
@@ -34,66 +33,37 @@ client = None
34
  vector_store = None
35
  storage_context = None
36
 
37
- # Define a persistent collection name.
38
- collection_name = "paper"
39
-
40
- # Use a persistent folder to store uploaded files.
41
- upload_dir = "uploaded_files"
42
- if not os.path.exists(upload_dir):
43
- os.makedirs(upload_dir)
44
- # We do not clear the folder to keep previously uploaded files.
45
-
46
  # -------------------------------------------------------
47
- # Function to process uploaded files and update the index.
48
  # -------------------------------------------------------
49
  def process_upload(files):
50
  """
51
- Accepts a list of uploaded file paths, saves them to a persistent folder,
52
- loads new documents, and builds or updates the vector index and chat engine.
53
  """
54
- global client, vector_store, storage_context, index, query_engine, memory, chat_engine
55
-
56
- # Copy files into the upload directory if not already present.
57
- new_file_paths = []
 
 
 
 
 
58
  for file_path in files:
59
  file_name = os.path.basename(file_path)
60
  dest = os.path.join(upload_dir, file_name)
61
- if not os.path.exists(dest):
62
- shutil.copy(file_path, dest)
63
- new_file_paths.append(dest)
64
 
65
- # If no new files are uploaded, notify the user.
66
- if not new_file_paths:
67
- return "No new documents to add."
68
-
69
- # Load only the new documents.
70
- new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
71
-
72
- # Initialize a persistent Qdrant client.
73
- client = qdrant_client.QdrantClient(
74
- path="./qdrant_db",
75
- prefer_grpc=True
76
- )
77
 
78
- # Ensure the collection exists.
79
- from qdrant_client.http import models
80
- existing_collections = {col.name for col in client.get_collections().collections}
81
- if collection_name not in existing_collections:
82
- client.create_collection(
83
- collection_name=collection_name,
84
- vectors_config={
85
- "text-dense": models.VectorParams(
86
- size=1536, # text-embedding-ada-002 produces 1536-dimensional vectors.
87
- distance=models.Distance.COSINE
88
- )
89
- }
90
- )
91
- # Wait briefly for the collection creation to complete.
92
- time.sleep(1)
93
 
94
- # Initialize (or re-use) the vector store.
95
  vector_store = QdrantVectorStore(
96
- collection_name=collection_name,
97
  client=client,
98
  enable_hybrid=True,
99
  batch_size=20,
@@ -101,19 +71,12 @@ def process_upload(files):
101
 
102
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
103
 
104
- # Build the index if it doesn't exist; otherwise, update it.
105
- if index is None:
106
- # Load all documents from the persistent folder.
107
- index = VectorStoreIndex.from_documents(
108
- SimpleDirectoryReader(upload_dir).load_data(),
109
- storage_context=storage_context
110
- )
111
- else:
112
- index.insert_documents(new_documents)
113
 
114
- # Reinitialize query and chat engines to reflect updates.
115
  query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
 
116
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
 
117
  chat_engine = index.as_chat_engine(
118
  chat_mode="context",
119
  memory=memory,
@@ -123,26 +86,32 @@ def process_upload(files):
123
  ),
124
  )
125
 
126
- return "Documents uploaded and index updated successfully!"
127
 
128
  # -------------------------------------------------------
129
  # Chat function that uses the built chat engine.
130
  # -------------------------------------------------------
131
  def chat_with_ai(user_input, chat_history):
132
  global chat_engine
 
133
  if chat_engine is None:
134
  return chat_history, "Please upload documents first."
135
 
136
  response = chat_engine.chat(user_input)
137
  references = response.source_nodes
138
- ref = []
 
 
139
  for node in references:
140
  file_name = node.metadata.get('file_name')
141
  if file_name and file_name not in ref:
142
  ref.append(file_name)
143
 
144
  complete_response = str(response) + "\n\n"
145
- chat_history.append((user_input, complete_response))
 
 
 
146
  return chat_history, ""
147
 
148
  # -------------------------------------------------------
@@ -161,6 +130,7 @@ def gradio_interface():
161
  # Use Tabs to separate the file upload and chat interfaces.
162
  with gr.Tab("Upload Documents"):
163
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
 
164
  file_upload = gr.File(
165
  label="Upload Files",
166
  file_count="multiple",
 
1
  import os
2
  import shutil
 
3
  import gradio as gr
4
  import qdrant_client
5
  from getpass import getpass
 
33
  vector_store = None
34
  storage_context = None
35
 
 
 
 
 
 
 
 
 
 
36
  # -------------------------------------------------------
37
+ # Function to process uploaded files and build the index.
38
  # -------------------------------------------------------
39
  def process_upload(files):
40
  """
41
+ Accepts a list of uploaded file paths, saves them to a local folder,
42
+ loads them as documents, and builds the vector index and chat engine.
43
  """
44
+ upload_dir = "uploaded_files"
45
+ if not os.path.exists(upload_dir):
46
+ os.makedirs(upload_dir)
47
+ else:
48
+ # Clear any existing files in the folder.
49
+ for f in os.listdir(upload_dir):
50
+ os.remove(os.path.join(upload_dir, f))
51
+
52
+ # 'files' is a list of file paths (Gradio's File component with type="file")
53
  for file_path in files:
54
  file_name = os.path.basename(file_path)
55
  dest = os.path.join(upload_dir, file_name)
56
+ shutil.copy(file_path, dest)
 
 
57
 
58
+ # Load documents from the saved folder.
59
+ documents = SimpleDirectoryReader(upload_dir).load_data()
 
 
 
 
 
 
 
 
 
 
60
 
61
+ # Build the index and chat engine using Qdrant as the vector store.
62
+ global client, vector_store, storage_context, index, query_engine, memory, chat_engine
63
+ client = qdrant_client.QdrantClient(location=":memory:")
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
65
  vector_store = QdrantVectorStore(
66
+ collection_name="paper",
67
  client=client,
68
  enable_hybrid=True,
69
  batch_size=20,
 
71
 
72
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
73
 
74
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 
 
 
 
 
 
 
75
 
 
76
  query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
77
+
78
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
79
+
80
  chat_engine = index.as_chat_engine(
81
  chat_mode="context",
82
  memory=memory,
 
86
  ),
87
  )
88
 
89
+ return "Documents uploaded and index built successfully!"
90
 
91
  # -------------------------------------------------------
92
  # Chat function that uses the built chat engine.
93
  # -------------------------------------------------------
94
  def chat_with_ai(user_input, chat_history):
95
  global chat_engine
96
+ # Check if the chat engine is initialized.
97
  if chat_engine is None:
98
  return chat_history, "Please upload documents first."
99
 
100
  response = chat_engine.chat(user_input)
101
  references = response.source_nodes
102
+ ref, pages = [], []
103
+
104
+ # Extract file names from the source nodes (if available)
105
  for node in references:
106
  file_name = node.metadata.get('file_name')
107
  if file_name and file_name not in ref:
108
  ref.append(file_name)
109
 
110
  complete_response = str(response) + "\n\n"
111
+ if ref or pages:
112
+ chat_history.append((user_input, complete_response))
113
+ else:
114
+ chat_history.append((user_input, str(response)))
115
  return chat_history, ""
116
 
117
  # -------------------------------------------------------
 
130
  # Use Tabs to separate the file upload and chat interfaces.
131
  with gr.Tab("Upload Documents"):
132
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
133
+ # The file upload widget: we specify allowed file types.
134
  file_upload = gr.File(
135
  label="Upload Files",
136
  file_count="multiple",