anasmkh commited on
Commit
5bb0370
·
verified ·
1 Parent(s): f321ab3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -11
app.py CHANGED
@@ -1,13 +1,16 @@
1
  import os
2
  import shutil
 
3
  import gradio as gr
4
  import qdrant_client
5
  from getpass import getpass
6
 
7
-
8
  openai_api_key = os.getenv('OPENAI_API_KEY')
9
 
10
-
 
 
11
  from llama_index.llms.openai import OpenAI
12
  from llama_index.embeddings.openai import OpenAIEmbedding
13
  from llama_index.core import Settings
@@ -31,6 +34,9 @@ client = None
31
  vector_store = None
32
  storage_context = None
33
 
 
 
 
34
  # -------------------------------------------------------
35
  # Function to process uploaded files and build the index.
36
  # -------------------------------------------------------
@@ -47,7 +53,7 @@ def process_upload(files):
47
  for f in os.listdir(upload_dir):
48
  os.remove(os.path.join(upload_dir, f))
49
 
50
- # 'files' is a list of file paths (Gradio's File component with type="file")
51
  for file_path in files:
52
  file_name = os.path.basename(file_path)
53
  dest = os.path.join(upload_dir, file_name)
@@ -58,10 +64,30 @@ def process_upload(files):
58
 
59
  # Build the index and chat engine using Qdrant as the vector store.
60
  global client, vector_store, storage_context, index, query_engine, memory, chat_engine
61
- client = qdrant_client.QdrantClient(location=":memory:")
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  vector_store = QdrantVectorStore(
64
- collection_name="paper",
65
  client=client,
66
  enable_hybrid=True,
67
  batch_size=20,
@@ -91,7 +117,6 @@ def process_upload(files):
91
  # -------------------------------------------------------
92
  def chat_with_ai(user_input, chat_history):
93
  global chat_engine
94
- # Check if the chat engine is initialized.
95
  if chat_engine is None:
96
  return chat_history, "Please upload documents first."
97
 
@@ -99,7 +124,6 @@ def chat_with_ai(user_input, chat_history):
99
  references = response.source_nodes
100
  ref, pages = [], []
101
 
102
- # Extract file names from the source nodes (if available)
103
  for node in references:
104
  file_name = node.metadata.get('file_name')
105
  if file_name and file_name not in ref:
@@ -125,15 +149,13 @@ def gradio_interface():
125
  with gr.Blocks() as demo:
126
  gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
127
 
128
- # Use Tabs to separate the file upload and chat interfaces.
129
  with gr.Tab("Upload Documents"):
130
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
131
- # The file upload widget: we specify allowed file types.
132
  file_upload = gr.File(
133
  label="Upload Files",
134
  file_count="multiple",
135
  file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
136
- type="filepath" # returns file paths
137
  )
138
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
139
  upload_button = gr.Button("Process Upload")
@@ -148,7 +170,6 @@ def gradio_interface():
148
  submit_button = gr.Button("Send")
149
  btn_clear = gr.Button("Clear History")
150
 
151
- # A State to hold the chat history.
152
  chat_history = gr.State([])
153
 
154
  submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
 
1
  import os
2
  import shutil
3
+ import time
4
  import gradio as gr
5
  import qdrant_client
6
  from getpass import getpass
7
 
8
+ # Set your OpenAI API key from environment variables.
9
  openai_api_key = os.getenv('OPENAI_API_KEY')
10
 
11
+ # -------------------------------------------------------
12
+ # Configure LlamaIndex with OpenAI LLM and Embeddings
13
+ # -------------------------------------------------------
14
  from llama_index.llms.openai import OpenAI
15
  from llama_index.embeddings.openai import OpenAIEmbedding
16
  from llama_index.core import Settings
 
34
  vector_store = None
35
  storage_context = None
36
 
37
+ # Define the collection name.
38
+ collection_name = "paper"
39
+
40
  # -------------------------------------------------------
41
  # Function to process uploaded files and build the index.
42
  # -------------------------------------------------------
 
53
  for f in os.listdir(upload_dir):
54
  os.remove(os.path.join(upload_dir, f))
55
 
56
+ # 'files' is a list of file paths.
57
  for file_path in files:
58
  file_name = os.path.basename(file_path)
59
  dest = os.path.join(upload_dir, file_name)
 
64
 
65
  # Build the index and chat engine using Qdrant as the vector store.
66
  global client, vector_store, storage_context, index, query_engine, memory, chat_engine
 
67
 
68
+ # Use a persistent Qdrant client.
69
+ client = qdrant_client.QdrantClient(
70
+ path="./qdrant_db",
71
+ prefer_grpc=True
72
+ )
73
+
74
+ # Ensure the collection exists.
75
+ from qdrant_client.http import models
76
+ existing_collections = {col.name for col in client.get_collections().collections}
77
+ if collection_name not in existing_collections:
78
+ client.create_collection(
79
+ collection_name=collection_name,
80
+ vectors_config=models.VectorParams(
81
+ size=1536, # text-embedding-ada-002 produces 1536-d vectors.
82
+ distance=models.Distance.COSINE
83
+ )
84
+ )
85
+ # Wait a moment for Qdrant to register the new collection.
86
+ time.sleep(1)
87
+
88
+ # Initialize the vector store.
89
  vector_store = QdrantVectorStore(
90
+ collection_name=collection_name,
91
  client=client,
92
  enable_hybrid=True,
93
  batch_size=20,
 
117
  # -------------------------------------------------------
118
  def chat_with_ai(user_input, chat_history):
119
  global chat_engine
 
120
  if chat_engine is None:
121
  return chat_history, "Please upload documents first."
122
 
 
124
  references = response.source_nodes
125
  ref, pages = [], []
126
 
 
127
  for node in references:
128
  file_name = node.metadata.get('file_name')
129
  if file_name and file_name not in ref:
 
149
  with gr.Blocks() as demo:
150
  gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
151
 
 
152
  with gr.Tab("Upload Documents"):
153
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
 
154
  file_upload = gr.File(
155
  label="Upload Files",
156
  file_count="multiple",
157
  file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
158
+ type="filepath"
159
  )
160
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
161
  upload_button = gr.Button("Process Upload")
 
170
  submit_button = gr.Button("Send")
171
  btn_clear = gr.Button("Clear History")
172
 
 
173
  chat_history = gr.State([])
174
 
175
  submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])