anasmkh commited on
Commit
c9eadbe
·
verified ·
1 Parent(s): 4e3419a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -38
app.py CHANGED
@@ -4,10 +4,12 @@ import gradio as gr
4
  import qdrant_client
5
  from getpass import getpass
6
 
7
-
8
  openai_api_key = os.getenv('OPENAI_API_KEY')
9
 
10
-
 
 
11
  from llama_index.llms.openai import OpenAI
12
  from llama_index.embeddings.openai import OpenAIEmbedding
13
  from llama_index.core import Settings
@@ -15,12 +17,14 @@ from llama_index.core import Settings
15
  Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
16
  Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
17
 
18
-
 
 
19
  from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
20
  from llama_index.vector_stores.qdrant import QdrantVectorStore
21
  from llama_index.core.memory import ChatMemoryBuffer
22
 
23
-
24
  chat_engine = None
25
  index = None
26
  query_engine = None
@@ -29,61 +33,85 @@ client = None
29
  vector_store = None
30
  storage_context = None
31
 
 
 
32
 
33
  def process_upload(files):
 
 
 
 
 
34
  upload_dir = "uploaded_files"
 
35
  if not os.path.exists(upload_dir):
36
  os.makedirs(upload_dir)
37
- else:
38
- for f in os.listdir(upload_dir):
39
- os.remove(os.path.join(upload_dir, f))
40
 
 
 
41
  for file_path in files:
42
  file_name = os.path.basename(file_path)
43
  dest = os.path.join(upload_dir, file_name)
44
- shutil.copy(file_path, dest)
 
 
 
45
 
46
- documents = SimpleDirectoryReader(upload_dir).load_data()
 
 
47
 
48
  global client, vector_store, storage_context, index, query_engine, memory, chat_engine
49
- client = qdrant_client.QdrantClient(
50
- path="./qdrant_db",
51
- prefer_grpc=True
52
- )
 
 
 
 
 
 
53
  existing_collections = {col.name for col in client.get_collections().collections}
54
  if collection_name not in existing_collections:
55
  client.create_collection(
56
  collection_name=collection_name,
57
  vectors_config=models.VectorParams(
58
- size=1536,
59
  distance=models.Distance.COSINE
60
  )
61
  )
62
- vector_store = QdrantVectorStore(
63
- collection_name=collection_name,
64
- client=client,
65
- enable_hybrid=True,
66
- batch_size=20,
67
- )
68
 
69
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 
 
 
 
 
 
70
 
71
- index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 
72
 
73
- query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
 
 
 
 
 
74
 
 
 
75
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
76
-
77
  chat_engine = index.as_chat_engine(
78
  chat_mode="context",
79
  memory=memory,
80
- system_prompt=(
81
- "You are an AI assistant who answers the user questions,"
82
- ),
83
  )
84
 
85
- return "Documents uploaded and index built successfully!"
86
-
87
 
88
  def chat_with_ai(user_input, chat_history):
89
  global chat_engine
@@ -92,25 +120,24 @@ def chat_with_ai(user_input, chat_history):
92
 
93
  response = chat_engine.chat(user_input)
94
  references = response.source_nodes
95
- ref, pages = [], []
96
 
 
97
  for node in references:
98
  file_name = node.metadata.get('file_name')
99
  if file_name and file_name not in ref:
100
  ref.append(file_name)
101
 
102
  complete_response = str(response) + "\n\n"
103
- if ref or pages:
104
  chat_history.append((user_input, complete_response))
105
  else:
106
  chat_history.append((user_input, str(response)))
107
  return chat_history, ""
108
 
109
-
110
  def clear_history():
111
  return [], ""
112
 
113
-
114
  def gradio_interface():
115
  with gr.Blocks() as demo:
116
  gr.Markdown("# AI Assistant")
@@ -118,10 +145,11 @@ def gradio_interface():
118
  with gr.Tab("Upload Documents"):
119
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
120
  file_upload = gr.File(
121
- label="Upload Files",
122
- file_count="multiple",
123
- file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
124
- type="filepath" )
 
125
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
126
  upload_button = gr.Button("Process Upload")
127
 
@@ -144,5 +172,4 @@ def gradio_interface():
144
 
145
  return demo
146
 
147
-
148
  gradio_interface().launch(debug=True)
 
4
  import qdrant_client
5
  from getpass import getpass
6
 
7
+ # Set your OpenAI API key from environment variables.
8
  openai_api_key = os.getenv('OPENAI_API_KEY')
9
 
10
+ # -------------------------------------------------------
11
+ # Configure LlamaIndex with OpenAI LLM and Embeddings
12
+ # -------------------------------------------------------
13
  from llama_index.llms.openai import OpenAI
14
  from llama_index.embeddings.openai import OpenAIEmbedding
15
  from llama_index.core import Settings
 
17
  Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
18
  Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
19
 
20
+ # -------------------------------------------------------
21
+ # Import document readers, index, vector store, memory, etc.
22
+ # -------------------------------------------------------
23
  from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
24
  from llama_index.vector_stores.qdrant import QdrantVectorStore
25
  from llama_index.core.memory import ChatMemoryBuffer
26
 
27
+ # Global variables to hold persistent objects.
28
  chat_engine = None
29
  index = None
30
  query_engine = None
 
33
  vector_store = None
34
  storage_context = None
35
 
36
+ # Define a global collection name (you can change this as needed)
37
+ collection_name = "paper"
38
 
39
  def process_upload(files):
40
+ """
41
+ Process newly uploaded files by copying them into a persistent folder,
42
+ loading their content, and then either building a new index or inserting
43
+ new documents into the existing index.
44
+ """
45
  upload_dir = "uploaded_files"
46
+ # Create the upload folder if it does not exist.
47
  if not os.path.exists(upload_dir):
48
  os.makedirs(upload_dir)
 
 
 
49
 
50
+ # Copy new files into the upload directory.
51
+ new_file_paths = []
52
  for file_path in files:
53
  file_name = os.path.basename(file_path)
54
  dest = os.path.join(upload_dir, file_name)
55
+ # Copy the file if it doesn't already exist.
56
+ if not os.path.exists(dest):
57
+ shutil.copy(file_path, dest)
58
+ new_file_paths.append(dest)
59
 
60
+ # Load only the newly uploaded documents.
61
+ # (SimpleDirectoryReader can accept a list of file paths via the 'input_files' parameter.)
62
+ documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
63
 
64
  global client, vector_store, storage_context, index, query_engine, memory, chat_engine
65
+
66
+ # Initialize Qdrant client if not already done.
67
+ if client is None:
68
+ client = qdrant_client.QdrantClient(
69
+ path="./qdrant_db",
70
+ prefer_grpc=True
71
+ )
72
+
73
+ # Ensure the collection exists.
74
+ from qdrant_client.http import models
75
  existing_collections = {col.name for col in client.get_collections().collections}
76
  if collection_name not in existing_collections:
77
  client.create_collection(
78
  collection_name=collection_name,
79
  vectors_config=models.VectorParams(
80
+ size=1536, # OpenAI's text-embedding-ada-002 produces 1536-d vectors.
81
  distance=models.Distance.COSINE
82
  )
83
  )
 
 
 
 
 
 
84
 
85
+ # Initialize the vector store if not already done.
86
+ if vector_store is None:
87
+ vector_store = QdrantVectorStore(
88
+ collection_name=collection_name,
89
+ client=client,
90
+ enable_hybrid=True,
91
+ batch_size=20,
92
+ )
93
 
94
+ # Initialize storage context if not already done.
95
+ if storage_context is None:
96
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
97
 
98
+ # If no index exists yet, create one from the documents.
99
+ if index is None:
100
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
101
+ else:
102
+ # Append the new documents to the existing index.
103
+ index.insert_documents(documents)
104
 
105
+ # (Optional) Reinitialize the query and chat engines so they reflect the updated index.
106
+ query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
107
  memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
 
108
  chat_engine = index.as_chat_engine(
109
  chat_mode="context",
110
  memory=memory,
111
+ system_prompt="You are an AI assistant who answers the user questions,"
 
 
112
  )
113
 
114
+ return "Documents uploaded and index updated successfully!"
 
115
 
116
  def chat_with_ai(user_input, chat_history):
117
  global chat_engine
 
120
 
121
  response = chat_engine.chat(user_input)
122
  references = response.source_nodes
123
+ ref = []
124
 
125
+ # Extract referenced file names from the response.
126
  for node in references:
127
  file_name = node.metadata.get('file_name')
128
  if file_name and file_name not in ref:
129
  ref.append(file_name)
130
 
131
  complete_response = str(response) + "\n\n"
132
+ if ref:
133
  chat_history.append((user_input, complete_response))
134
  else:
135
  chat_history.append((user_input, str(response)))
136
  return chat_history, ""
137
 
 
138
  def clear_history():
139
  return [], ""
140
 
 
141
  def gradio_interface():
142
  with gr.Blocks() as demo:
143
  gr.Markdown("# AI Assistant")
 
145
  with gr.Tab("Upload Documents"):
146
  gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
147
  file_upload = gr.File(
148
+ label="Upload Files",
149
+ file_count="multiple",
150
+ file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
151
+ type="filepath" # Returns file paths.
152
+ )
153
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
154
  upload_button = gr.Button("Process Upload")
155
 
 
172
 
173
  return demo
174
 
 
175
  gradio_interface().launch(debug=True)