anasmkh commited on
Commit
fdd2048
·
verified ·
1 Parent(s): 6b3a267

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -25
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  import qdrant_client
5
  from getpass import getpass
6
 
7
- # Set your OpenAI API key from environmnt variables.
8
  openai_api_key = os.getenv('OPENAI_API_KEY')
9
 
10
  # -------------------------------------------------------
@@ -33,38 +33,138 @@ client = None
33
  vector_store = None
34
  storage_context = None
35
 
 
 
 
 
 
 
 
 
36
  # -------------------------------------------------------
37
- # Function to process uploaded files and build the index.
38
  # -------------------------------------------------------
39
  def process_upload(files):
40
  """
41
- Accepts a list of uploaded file paths, saves them to a local folder,
42
- loads them as documents, and builds the vector index and chat engine.
43
  """
44
- upload_dir = "uploaded_files"
45
- if not os.path.exists(upload_dir):
46
- os.makedirs(upload_dir)
47
- else:
48
- # Clear any existing files in the folder.
49
- for f in os.listdir(upload_dir):
50
- os.remove(os.path.join(upload_dir, f))
51
-
52
- # 'files' is a list of file paths (Gradio's File component with type="file")
53
  for file_path in files:
54
  file_name = os.path.basename(file_path)
55
  dest = os.path.join(upload_dir, file_name)
56
- shutil.copy(file_path, dest)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- # Load documents from the saved folder.
59
- documents = SimpleDirectoryReader(upload_dir).load_data()
 
60
 
61
- # Build the index and chat engine using Qdrant as the vector store.
62
- global client, vector_store, storage_context, index, query_engine, memory, chat_engine
63
- client = qdrant_client.QdrantClient(location=":memory:")
 
 
64
 
65
- vector_store = QdrantVectorStore(
66
- collection_name="paper",
67
- client=client,
68
- enable_hybrid=True,
69
- batch_size=20,
70
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import qdrant_client
5
  from getpass import getpass
6
 
7
+ # Set your OpenAI API key from environment variables.
8
  openai_api_key = os.getenv('OPENAI_API_KEY')
9
 
10
  # -------------------------------------------------------
 
33
  vector_store = None
34
  storage_context = None
35
 
36
+ # Use a persistent folder to keep uploaded files.
37
+ upload_dir = "uploaded_files"
38
+ if not os.path.exists(upload_dir):
39
+ os.makedirs(upload_dir)
40
+
41
+ # A set to track which files have already been processed.
42
+ processed_files = set()
43
+
44
  # -------------------------------------------------------
45
+ # Function to process uploaded files and update the index.
46
  # -------------------------------------------------------
47
  def process_upload(files):
48
  """
49
+ Accepts a list of uploaded file paths, saves them to a persistent folder,
50
+ loads only new documents, and builds (or updates) the vector index and chat engine.
51
  """
52
+ global client, vector_store, storage_context, index, query_engine, memory, chat_engine, processed_files
53
+
54
+ new_file_paths = []
55
+ # Loop over each uploaded file.
 
 
 
 
 
56
  for file_path in files:
57
  file_name = os.path.basename(file_path)
58
  dest = os.path.join(upload_dir, file_name)
59
+ # If the file is not already in our folder, copy it.
60
+ if file_name not in processed_files:
61
+ if not os.path.exists(dest):
62
+ shutil.copy(file_path, dest)
63
+ new_file_paths.append(dest)
64
+ processed_files.add(file_name)
65
+
66
+ if not new_file_paths:
67
+ return "No new documents to add."
68
+
69
+ # Load only the new documents.
70
+ new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
71
+
72
+ # If this is the first upload, build the index from scratch.
73
+ if index is None:
74
+ # (Here we use an in-memory Qdrant client. Change ":memory:" to a persistent path if needed.)
75
+ client = qdrant_client.QdrantClient(location=":memory:")
76
+ vector_store = QdrantVectorStore(
77
+ collection_name="paper",
78
+ client=client,
79
+ enable_hybrid=True,
80
+ batch_size=20,
81
+ )
82
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
83
+ index = VectorStoreIndex.from_documents(new_documents, storage_context=storage_context)
84
+ else:
85
+ # Otherwise, insert the new documents into the existing index.
86
+ index.insert_documents(new_documents)
87
+
88
+ # Reinitialize query and chat engines so they use the updated index.
89
+ query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
90
+ memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
91
+ chat_engine = index.as_chat_engine(
92
+ chat_mode="context",
93
+ memory=memory,
94
+ system_prompt=(
95
+ "You are an AI assistant who answers the user questions, "
96
+ "use the schema fields to generate appropriate and valid json queries"
97
+ ),
98
+ )
99
+
100
+ return "Documents uploaded and index updated successfully!"
101
+
102
+ # -------------------------------------------------------
103
+ # Chat function that uses the built chat engine.
104
+ # -------------------------------------------------------
105
+ def chat_with_ai(user_input, chat_history):
106
+ global chat_engine
107
+ # Check if the chat engine is initialized.
108
+ if chat_engine is None:
109
+ return chat_history, "Please upload documents first."
110
 
111
+ response = chat_engine.chat(user_input)
112
+ references = response.source_nodes
113
+ ref = []
114
 
115
+ # Extract file names from the source nodes (if available)
116
+ for node in references:
117
+ file_name = node.metadata.get('file_name')
118
+ if file_name and file_name not in ref:
119
+ ref.append(file_name)
120
 
121
+ complete_response = str(response) + "\n\n"
122
+ chat_history.append((user_input, complete_response))
123
+ return chat_history, ""
124
+
125
+ # -------------------------------------------------------
126
+ # Function to clear the chat history.
127
+ # -------------------------------------------------------
128
+ def clear_history():
129
+ return [], ""
130
+
131
+ # -------------------------------------------------------
132
+ # Build the Gradio interface.
133
+ # -------------------------------------------------------
134
+ def gradio_interface():
135
+ with gr.Blocks() as demo:
136
+ gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
137
+
138
+ with gr.Tab("Upload Documents"):
139
+ gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
140
+ # The file upload widget: we specify allowed file types.
141
+ file_upload = gr.File(
142
+ label="Upload Files",
143
+ file_count="multiple",
144
+ file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
145
+ type="filepath" # returns file paths
146
+ )
147
+ upload_status = gr.Textbox(label="Upload Status", interactive=False)
148
+ upload_button = gr.Button("Process Upload")
149
+
150
+ upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
151
+
152
+ with gr.Tab("Chat"):
153
+ chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
154
+ user_input = gr.Textbox(
155
+ placeholder="Ask a question...", label="Enter your question"
156
+ )
157
+ submit_button = gr.Button("Send")
158
+ btn_clear = gr.Button("Clear History")
159
+
160
+ # A State to hold the chat history.
161
+ chat_history = gr.State([])
162
+
163
+ submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
164
+ user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
165
+ btn_clear.click(clear_history, outputs=[chatbot, user_input])
166
+
167
+ return demo
168
+
169
+ # Launch the Gradio app.
170
+ gradio_interface().launch(debug=True)