anasmkh commited on
Commit
aff47dd
·
verified ·
1 Parent(s): c9eadbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -62
app.py CHANGED
@@ -24,7 +24,7 @@ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageCon
24
  from llama_index.vector_stores.qdrant import QdrantVectorStore
25
  from llama_index.core.memory import ChatMemoryBuffer
26
 
27
- # Global variables to hold persistent objects.
28
  chat_engine = None
29
  index = None
30
  query_engine = None
@@ -33,122 +33,109 @@ client = None
33
  vector_store = None
34
  storage_context = None
35
 
36
- # Define a global collection name (you can change this as needed)
37
- collection_name = "paper"
38
-
39
  def process_upload(files):
40
  """
41
- Process newly uploaded files by copying them into a persistent folder,
42
- loading their content, and then either building a new index or inserting
43
- new documents into the existing index.
44
  """
45
  upload_dir = "uploaded_files"
46
- # Create the upload folder if it does not exist.
47
  if not os.path.exists(upload_dir):
48
  os.makedirs(upload_dir)
 
 
 
 
49
 
50
- # Copy new files into the upload directory.
51
- new_file_paths = []
52
  for file_path in files:
53
  file_name = os.path.basename(file_path)
54
  dest = os.path.join(upload_dir, file_name)
55
- # Copy the file if it doesn't already exist.
56
- if not os.path.exists(dest):
57
- shutil.copy(file_path, dest)
58
- new_file_paths.append(dest)
59
 
60
- # Load only the newly uploaded documents.
61
- # (SimpleDirectoryReader can accept a list of file paths via the 'input_files' parameter.)
62
- documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
63
 
 
64
  global client, vector_store, storage_context, index, query_engine, memory, chat_engine
65
-
66
- # Initialize Qdrant client if not already done.
67
- if client is None:
68
- client = qdrant_client.QdrantClient(
69
- path="./qdrant_db",
70
- prefer_grpc=True
71
- )
72
 
73
- # Ensure the collection exists.
74
- from qdrant_client.http import models
75
- existing_collections = {col.name for col in client.get_collections().collections}
76
- if collection_name not in existing_collections:
77
- client.create_collection(
78
- collection_name=collection_name,
79
- vectors_config=models.VectorParams(
80
- size=1536, # OpenAI's text-embedding-ada-002 produces 1536-d vectors.
81
- distance=models.Distance.COSINE
82
- )
83
- )
84
-
85
- # Initialize the vector store if not already done.
86
- if vector_store is None:
87
- vector_store = QdrantVectorStore(
88
- collection_name=collection_name,
89
- client=client,
90
- enable_hybrid=True,
91
- batch_size=20,
92
- )
93
 
94
- # Initialize storage context if not already done.
95
- if storage_context is None:
96
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
97
 
98
- # If no index exists yet, create one from the documents.
99
- if index is None:
100
- index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
101
- else:
102
- # Append the new documents to the existing index.
103
- index.insert_documents(documents)
104
 
105
- # (Optional) Reinitialize the query and chat engines so they reflect the updated index.
106
  query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
 
107
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
 
108
  chat_engine = index.as_chat_engine(
109
  chat_mode="context",
110
  memory=memory,
111
- system_prompt="You are an AI assistant who answers the user questions,"
 
 
 
112
  )
113
 
114
- return "Documents uploaded and index updated successfully!"
115
 
 
 
 
116
  def chat_with_ai(user_input, chat_history):
117
  global chat_engine
 
118
  if chat_engine is None:
119
  return chat_history, "Please upload documents first."
120
 
121
  response = chat_engine.chat(user_input)
122
  references = response.source_nodes
123
- ref = []
124
 
125
- # Extract referenced file names from the response.
126
  for node in references:
127
  file_name = node.metadata.get('file_name')
128
  if file_name and file_name not in ref:
129
  ref.append(file_name)
130
 
131
  complete_response = str(response) + "\n\n"
132
- if ref:
133
  chat_history.append((user_input, complete_response))
134
  else:
135
  chat_history.append((user_input, str(response)))
136
  return chat_history, ""
137
 
 
 
 
138
  def clear_history():
139
  return [], ""
140
 
 
 
 
141
  def gradio_interface():
142
  with gr.Blocks() as demo:
143
- gr.Markdown("# AI Assistant")
144
 
 
145
  with gr.Tab("Upload Documents"):
146
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
 
147
  file_upload = gr.File(
148
  label="Upload Files",
149
  file_count="multiple",
150
  file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
151
- type="filepath" # Returns file paths.
152
  )
153
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
154
  upload_button = gr.Button("Process Upload")
@@ -156,7 +143,7 @@ def gradio_interface():
156
  upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
157
 
158
  with gr.Tab("Chat"):
159
- chatbot = gr.Chatbot(label="AI Assistant Chat Interface")
160
  user_input = gr.Textbox(
161
  placeholder="Ask a question...", label="Enter your question"
162
  )
@@ -172,4 +159,5 @@ def gradio_interface():
172
 
173
  return demo
174
 
 
175
  gradio_interface().launch(debug=True)
 
24
  from llama_index.vector_stores.qdrant import QdrantVectorStore
25
  from llama_index.core.memory import ChatMemoryBuffer
26
 
27
+ # Global variables to hold the index and chat engine.
28
  chat_engine = None
29
  index = None
30
  query_engine = None
 
33
  vector_store = None
34
  storage_context = None
35
 
36
+ # -------------------------------------------------------
37
+ # Function to process uploaded files and build the index.
38
+ # -------------------------------------------------------
39
  def process_upload(files):
40
  """
41
+ Accepts a list of uploaded file paths, saves them to a local folder,
42
+ loads them as documents, and builds the vector index and chat engine.
 
43
  """
44
  upload_dir = "uploaded_files"
 
45
  if not os.path.exists(upload_dir):
46
  os.makedirs(upload_dir)
47
+ else:
48
+ # Clear any existing files in the folder.
49
+ for f in os.listdir(upload_dir):
50
+ os.remove(os.path.join(upload_dir, f))
51
 
52
+ # 'files' is a list of file paths (Gradio's File component with type="file")
 
53
  for file_path in files:
54
  file_name = os.path.basename(file_path)
55
  dest = os.path.join(upload_dir, file_name)
56
+ shutil.copy(file_path, dest)
 
 
 
57
 
58
+ # Load documents from the saved folder.
59
+ documents = SimpleDirectoryReader(upload_dir).load_data()
 
60
 
61
+ # Build the index and chat engine using Qdrant as the vector store.
62
  global client, vector_store, storage_context, index, query_engine, memory, chat_engine
63
+ client = qdrant_client.QdrantClient(location=":memory:")
 
 
 
 
 
 
64
 
65
+ vector_store = QdrantVectorStore(
66
+ collection_name="paper",
67
+ client=client,
68
+ enable_hybrid=True,
69
+ batch_size=20,
70
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 
73
 
74
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 
 
 
 
75
 
 
76
  query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
77
+
78
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
79
+
80
  chat_engine = index.as_chat_engine(
81
  chat_mode="context",
82
  memory=memory,
83
+ system_prompt=(
84
+ "You are an AI assistant who answers the user questions, "
85
+ "use the schema fields to generate appropriate and valid json queries"
86
+ ),
87
  )
88
 
89
+ return "Documents uploaded and index built successfully!"
90
 
91
+ # -------------------------------------------------------
92
+ # Chat function that uses the built chat engine.
93
+ # -------------------------------------------------------
94
  def chat_with_ai(user_input, chat_history):
95
  global chat_engine
96
+ # Check if the chat engine is initialized.
97
  if chat_engine is None:
98
  return chat_history, "Please upload documents first."
99
 
100
  response = chat_engine.chat(user_input)
101
  references = response.source_nodes
102
+ ref, pages = [], []
103
 
104
+ # Extract file names from the source nodes (if available)
105
  for node in references:
106
  file_name = node.metadata.get('file_name')
107
  if file_name and file_name not in ref:
108
  ref.append(file_name)
109
 
110
  complete_response = str(response) + "\n\n"
111
+ if ref or pages:
112
  chat_history.append((user_input, complete_response))
113
  else:
114
  chat_history.append((user_input, str(response)))
115
  return chat_history, ""
116
 
117
+ # -------------------------------------------------------
118
+ # Function to clear the chat history.
119
+ # -------------------------------------------------------
120
  def clear_history():
121
  return [], ""
122
 
123
+ # -------------------------------------------------------
124
+ # Build the Gradio interface.
125
+ # -------------------------------------------------------
126
  def gradio_interface():
127
  with gr.Blocks() as demo:
128
+ gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
129
 
130
+ # Use Tabs to separate the file upload and chat interfaces.
131
  with gr.Tab("Upload Documents"):
132
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
133
+ # The file upload widget: we specify allowed file types.
134
  file_upload = gr.File(
135
  label="Upload Files",
136
  file_count="multiple",
137
  file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
138
+ type="filepath" # returns file paths
139
  )
140
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
141
  upload_button = gr.Button("Process Upload")
 
143
  upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
144
 
145
  with gr.Tab("Chat"):
146
+ chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
147
  user_input = gr.Textbox(
148
  placeholder="Ask a question...", label="Enter your question"
149
  )
 
159
 
160
  return demo
161
 
162
+ # Launch the Gradio app.
163
  gradio_interface().launch(debug=True)