anasmkh commited on
Commit
a406aa7
·
verified ·
1 Parent(s): fda2b61

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import streamlit as st
4
+ from io import BytesIO
5
+
6
+ # Importing LlamaIndex components
7
+ from llama_index.llms.openai import OpenAI
8
+ from llama_index.embeddings.openai import OpenAIEmbedding
9
+ from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
10
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
11
+ from llama_index.core.memory import ChatMemoryBuffer
12
+ import qdrant_client
13
+
14
+ # =============================================================================
15
+ # Configuration and Global Initialization
16
+ # =============================================================================
17
+
18
+ # Ensure that the OpenAI API key is available
19
+ openai_api_key = os.getenv("OPENAI_API_KEY")
20
+ if not openai_api_key:
21
+ raise ValueError("Please set your OPENAI_API_KEY environment variable.")
22
+
23
+ # System prompt for the chat engine
24
+ SYSTEM_PROMPT = (
25
+ "You are an AI assistant who answers the user questions, "
26
+ "use the schema fields to generate appropriate and valid json queries"
27
+ )
28
+
29
+ # Configure the LLM and embedding models
30
+ Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
31
+ Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
32
+
33
+ # Load initial documents from a directory called "new_file"
34
+ if os.path.exists("new_file"):
35
+ documents = SimpleDirectoryReader("new_file").load_data()
36
+ else:
37
+ documents = []
38
+
39
+ # Set up the Qdrant vector store (using an in-memory collection for simplicity)
40
+ client = qdrant_client.QdrantClient(location=":memory:")
41
+ vector_store = QdrantVectorStore(
42
+ collection_name="paper",
43
+ client=client,
44
+ enable_hybrid=True,
45
+ batch_size=20,
46
+ )
47
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
48
+
49
+ # Build the initial index and chat engine
50
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
51
+ chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
52
+ chat_engine = index.as_chat_engine(
53
+ chat_mode="context",
54
+ memory=chat_memory,
55
+ system_prompt=SYSTEM_PROMPT,
56
+ )
57
+
58
+ # =============================================================================
59
+ # Helper Functions
60
+ # =============================================================================
61
+
62
+ def process_uploaded_file(uploaded_file: BytesIO) -> str:
63
+ """
64
+ Process the uploaded file:
65
+ 1. Save the file to an "uploads" folder.
66
+ 2. Copy it to a temporary folder ("temp_upload") for reading.
67
+ 3. Update the global documents list and rebuild the index and chat engine.
68
+ """
69
+ if uploaded_file is None:
70
+ return "No file uploaded."
71
+
72
+ # Ensure the uploads directory exists
73
+ uploads_dir = "uploads"
74
+ os.makedirs(uploads_dir, exist_ok=True)
75
+
76
+ # Save the uploaded file locally
77
+ file_name = uploaded_file.name
78
+ dest_path = os.path.join(uploads_dir, file_name)
79
+ with open(dest_path, "wb") as f:
80
+ f.write(uploaded_file.getbuffer())
81
+
82
+ # Prepare a temporary directory for processing the file
83
+ temp_dir = "temp_upload"
84
+ os.makedirs(temp_dir, exist_ok=True)
85
+ # Clear any existing file in temp_upload directory
86
+ for f_name in os.listdir(temp_dir):
87
+ os.remove(os.path.join(temp_dir, f_name))
88
+ shutil.copy(dest_path, temp_dir)
89
+
90
+ # Load new document(s) from the temporary folder using SimpleDirectoryReader
91
+ new_docs = SimpleDirectoryReader(temp_dir).load_data()
92
+
93
+ # Update global documents and rebuild the index and chat engine
94
+ global documents, index, chat_engine
95
+ documents.extend(new_docs)
96
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
97
+ chat_engine = index.as_chat_engine(
98
+ chat_mode="context",
99
+ memory=chat_memory,
100
+ system_prompt=SYSTEM_PROMPT,
101
+ )
102
+
103
+ return f"File '{file_name}' processed and added to the index."
104
+
105
+ def chat_with_ai(user_input: str) -> str:
106
+ """
107
+ Send user input to the chat engine and return the response.
108
+ """
109
+ response = chat_engine.chat(user_input)
110
+ # Extract references from the response (if any)
111
+ references = response.source_nodes
112
+ ref = []
113
+ for node in references:
114
+ if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
115
+ ref.append(node.metadata["file_name"])
116
+ complete_response = str(response)
117
+ if ref:
118
+ complete_response += "\n\nReferences: " + ", ".join(ref)
119
+ return complete_response
120
+
121
+ # =============================================================================
122
+ # Streamlit App Layout
123
+ # =============================================================================
124
+
125
+ st.set_page_config(page_title="LlamaIndex Chat & File Upload", layout="wide")
126
+ st.title("Chat Interface for LlamaIndex with File Upload")
127
+
128
+ # Use Streamlit tabs for separate Chat and Upload functionalities
129
+ tab1, tab2 = st.tabs(["Chat", "Upload"])
130
+
131
+ # -----------------------------------------------------------------------------
132
+ # Chat Tab
133
+ # -----------------------------------------------------------------------------
134
+ with tab1:
135
+ st.header("Chat with the AI")
136
+ # Initialize chat history in session state if it does not exist
137
+ if "chat_history" not in st.session_state:
138
+ st.session_state["chat_history"] = []
139
+
140
+ # Display conversation history
141
+ for chat in st.session_state["chat_history"]:
142
+ st.markdown(f"**User:** {chat[0]}")
143
+ st.markdown(f"**AI:** {chat[1]}")
144
+ st.markdown("---")
145
+
146
+ # Input text for user query
147
+ user_input = st.text_input("Enter your question:")
148
+
149
+ # When the "Send" button is clicked, process the chat
150
+ if st.button("Send") and user_input:
151
+ with st.spinner("Processing..."):
152
+ response = chat_with_ai(user_input)
153
+ st.session_state["chat_history"].append((user_input, response))
154
+ st.experimental_rerun() # Refresh the page to show updated history
155
+
156
+ # Button to clear the conversation history
157
+ if st.button("Clear History"):
158
+ st.session_state["chat_history"] = []
159
+ st.experimental_rerun()
160
+
161
+ # -----------------------------------------------------------------------------
162
+ # Upload Tab
163
+ # -----------------------------------------------------------------------------
164
+ with tab2:
165
+ st.header("Upload a File")
166
+ uploaded_file = st.file_uploader("Choose a file to upload", type=["txt", "pdf", "doc", "docx", "csv", "xlsx"])
167
+ if st.button("Upload and Process"):
168
+ if uploaded_file is not None:
169
+ with st.spinner("Uploading and processing file..."):
170
+ status = process_uploaded_file(uploaded_file)
171
+ st.success(status)
172
+ else:
173
+ st.error("No file uploaded.")
174
+