Update app.py
Browse files
app.py
CHANGED
@@ -78,7 +78,7 @@ def initialize_apis(api, model, pinecone_api_key, groq_api_key, azure_api_key):
|
|
78 |
llm = initialize_llm(api, model, groq_api_key, azure_api_key)
|
79 |
if pinecone_index is None:
|
80 |
pinecone_client = Pinecone(pinecone_api_key)
|
81 |
-
pinecone_index = pinecone_client.Index("
|
82 |
logging.info("Initialized LLM and Pinecone.")
|
83 |
except Exception as e:
|
84 |
log_and_exit(f"Error initializing APIs: {e}")
|
@@ -103,10 +103,9 @@ def initialize_llm(api, model, groq_api_key, azure_api_key):
|
|
103 |
)
|
104 |
|
105 |
def load_pdf_data(chunk_size):
|
106 |
-
PDF_FILE_PATH = "policy.pdf"
|
107 |
reader = PyMuPDFReader()
|
108 |
file_extractor = {".pdf": reader}
|
109 |
-
documents = SimpleDirectoryReader(input_files=[
|
110 |
return documents
|
111 |
|
112 |
def create_index(documents, embedding_model_type="HF", embedding_model="BAAI/bge-large-en-v1.5", retriever_method="BM25", chunk_size=512):
|
@@ -227,7 +226,7 @@ def log_to_google_sheets(data):
|
|
227 |
except Exception as e:
|
228 |
logging.error(f"Error logging data to Google Sheets: {e}")
|
229 |
|
230 |
-
def update_google_sheets(question_id, feedback=None, detailed_feedback=None):
|
231 |
try:
|
232 |
existing_data = sheet.get_all_values()
|
233 |
headers = existing_data[0]
|
@@ -237,6 +236,8 @@ def update_google_sheets(question_id, feedback=None, detailed_feedback=None):
|
|
237 |
sheet.update_cell(i+1, headers.index("Feedback") + 1, feedback)
|
238 |
if detailed_feedback is not None:
|
239 |
sheet.update_cell(i+1, headers.index("Detailed Feedback") + 1, detailed_feedback)
|
|
|
|
|
240 |
logging.info("Updated data in Google Sheets.")
|
241 |
return
|
242 |
except Exception as e:
|
@@ -252,7 +253,6 @@ def run_streamlit_app():
|
|
252 |
|
253 |
with col1:
|
254 |
pinecone_api_key = st.text_input("Pinecone API Key")
|
255 |
-
parse_api_key = st.text_input("Parse API Key")
|
256 |
azure_api_key = st.text_input("Azure API Key")
|
257 |
groq_api_key = st.text_input("Groq API Key")
|
258 |
|
@@ -264,8 +264,8 @@ def run_streamlit_app():
|
|
264 |
st.session_state['selected_api'] = 'groq'
|
265 |
|
266 |
with col2:
|
267 |
-
selected_model = st.selectbox("Select Model", ["llama3-8b", "llama3-70b", "mixtral-8x7b", "gemma-7b", "gpt35"], key='selected_model', on_change=update_api_based_on_model)
|
268 |
-
selected_api = st.selectbox("Select API", ["azure", "groq"], key='selected_api', disabled=True)
|
269 |
embedding_model_type = "HF"
|
270 |
embedding_model = st.selectbox("Select Embedding Model", ["BAAI/bge-large-en-v1.5", "other_model"])
|
271 |
retriever_method = st.selectbox("Select Retriever Method", ["Vector Search", "BM25", "BM25+Vector"])
|
@@ -298,25 +298,35 @@ def run_streamlit_app():
|
|
298 |
)
|
299 |
st.markdown("### Answer")
|
300 |
st.markdown(chat['response'])
|
301 |
-
|
|
|
302 |
with col1:
|
303 |
-
if st.button("
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
|
|
308 |
with col2:
|
309 |
-
if st.button("
|
310 |
-
|
311 |
-
|
312 |
-
st.session_state.chat_history[chat_index] = chat
|
313 |
-
update_google_sheets(chat['id'], feedback=-1)
|
314 |
-
with col3:
|
315 |
-
feedback = st.text_area("How was the response? Does it match the context? Does it answer the question fully?", key=f"textarea_{chat_index}")
|
316 |
-
if st.button("Submit Feedback", key=f"submit_{chat_index}"):
|
317 |
-
chat['detailed_feedback'] = feedback
|
318 |
st.session_state.chat_history[chat_index] = chat
|
319 |
-
update_google_sheets(chat['id'],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
if question := st.chat_input("Enter your question"):
|
322 |
if st.session_state.query_engine:
|
@@ -329,10 +339,10 @@ def run_streamlit_app():
|
|
329 |
logging.info(f"Retrieved contexts: {[node.text for node in response.source_nodes]}")
|
330 |
question_id = str(uuid.uuid4())
|
331 |
timestamp = datetime.now().isoformat()
|
332 |
-
st.session_state.chat_history.append({'id': question_id, 'user': question, 'response': response.response, 'contexts': response.source_nodes, 'feedback': 0, 'detailed_feedback': '', 'timestamp': timestamp})
|
333 |
|
334 |
# Log initial query and response to Google Sheets without feedback
|
335 |
-
log_to_google_sheets([question_id, question, response.response, st.session_state['selected_api'], selected_model, embedding_model, retriever_method, chunk_size, top_k, 0, "", timestamp])
|
336 |
|
337 |
st.rerun()
|
338 |
else:
|
|
|
78 |
llm = initialize_llm(api, model, groq_api_key, azure_api_key)
|
79 |
if pinecone_index is None:
|
80 |
pinecone_client = Pinecone(pinecone_api_key)
|
81 |
+
pinecone_index = pinecone_client.Index("ll144")
|
82 |
logging.info("Initialized LLM and Pinecone.")
|
83 |
except Exception as e:
|
84 |
log_and_exit(f"Error initializing APIs: {e}")
|
|
|
103 |
)
|
104 |
|
105 |
def load_pdf_data(chunk_size):
|
|
|
106 |
reader = PyMuPDFReader()
|
107 |
file_extractor = {".pdf": reader}
|
108 |
+
documents = SimpleDirectoryReader(input_files=['LL144.pdf', 'LL144_Definitions.pdf'], file_extractor=file_extractor).load_data()
|
109 |
return documents
|
110 |
|
111 |
def create_index(documents, embedding_model_type="HF", embedding_model="BAAI/bge-large-en-v1.5", retriever_method="BM25", chunk_size=512):
|
|
|
226 |
except Exception as e:
|
227 |
logging.error(f"Error logging data to Google Sheets: {e}")
|
228 |
|
229 |
+
def update_google_sheets(question_id, feedback=None, detailed_feedback=None, annotated_answer=None):
|
230 |
try:
|
231 |
existing_data = sheet.get_all_values()
|
232 |
headers = existing_data[0]
|
|
|
236 |
sheet.update_cell(i+1, headers.index("Feedback") + 1, feedback)
|
237 |
if detailed_feedback is not None:
|
238 |
sheet.update_cell(i+1, headers.index("Detailed Feedback") + 1, detailed_feedback)
|
239 |
+
if annotated_answer is not None:
|
240 |
+
sheet.update_cell(i+1, headers.index("annotated_answer") + 1, annotated_answer)
|
241 |
logging.info("Updated data in Google Sheets.")
|
242 |
return
|
243 |
except Exception as e:
|
|
|
253 |
|
254 |
with col1:
|
255 |
pinecone_api_key = st.text_input("Pinecone API Key")
|
|
|
256 |
azure_api_key = st.text_input("Azure API Key")
|
257 |
groq_api_key = st.text_input("Groq API Key")
|
258 |
|
|
|
264 |
st.session_state['selected_api'] = 'groq'
|
265 |
|
266 |
with col2:
|
267 |
+
selected_model = st.selectbox("Select Model", ["llama3-8b", "llama3-70b", "mixtral-8x7b", "gemma-7b", "gpt35"], index=4, key='selected_model', on_change=update_api_based_on_model)
|
268 |
+
selected_api = st.selectbox("Select API", ["azure", "groq"], index=0, key='selected_api', disabled=True)
|
269 |
embedding_model_type = "HF"
|
270 |
embedding_model = st.selectbox("Select Embedding Model", ["BAAI/bge-large-en-v1.5", "other_model"])
|
271 |
retriever_method = st.selectbox("Select Retriever Method", ["Vector Search", "BM25", "BM25+Vector"])
|
|
|
298 |
)
|
299 |
st.markdown("### Answer")
|
300 |
st.markdown(chat['response'])
|
301 |
+
|
302 |
+
col1, col2 = st.columns([1, 1])
|
303 |
with col1:
|
304 |
+
if st.button("Annotate π", key=f"annotate_{chat_index}"):
|
305 |
+
chat['annotate'] = True
|
306 |
+
chat['feedback'] = -1
|
307 |
+
st.session_state.chat_history[chat_index] = chat
|
308 |
+
update_google_sheets(chat['id'], feedback=-1)
|
309 |
+
st.rerun()
|
310 |
with col2:
|
311 |
+
if st.button("Approve π", key=f"approve_{chat_index}"):
|
312 |
+
chat['approved'] = True
|
313 |
+
chat['feedback'] = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
st.session_state.chat_history[chat_index] = chat
|
315 |
+
update_google_sheets(chat['id'], feedback=1, annotated_answer=chat['response'])
|
316 |
+
|
317 |
+
if chat.get('annotate', False):
|
318 |
+
annotated_answer = st.text_area("Annotate Answer", value=chat['response'], key=f"annotate_text_{chat_index}")
|
319 |
+
if st.button("Submit Annotated Answer", key=f"submit_annotate_{chat_index}"):
|
320 |
+
chat['annotated_answer'] = annotated_answer
|
321 |
+
chat['annotate'] = False
|
322 |
+
st.session_state.chat_history[chat_index] = chat
|
323 |
+
update_google_sheets(chat['id'], annotated_answer=annotated_answer)
|
324 |
+
|
325 |
+
feedback = st.text_area("How was the response? Does it match the context? Does it answer the question fully?", key=f"textarea_{chat_index}")
|
326 |
+
if st.button("Submit Feedback", key=f"submit_{chat_index}"):
|
327 |
+
chat['detailed_feedback'] = feedback
|
328 |
+
st.session_state.chat_history[chat_index] = chat
|
329 |
+
update_google_sheets(chat['id'], detailed_feedback=feedback)
|
330 |
|
331 |
if question := st.chat_input("Enter your question"):
|
332 |
if st.session_state.query_engine:
|
|
|
339 |
logging.info(f"Retrieved contexts: {[node.text for node in response.source_nodes]}")
|
340 |
question_id = str(uuid.uuid4())
|
341 |
timestamp = datetime.now().isoformat()
|
342 |
+
st.session_state.chat_history.append({'id': question_id, 'user': question, 'response': response.response, 'contexts': response.source_nodes, 'feedback': 0, 'detailed_feedback': '', 'annotated_answer': '', 'timestamp': timestamp})
|
343 |
|
344 |
# Log initial query and response to Google Sheets without feedback
|
345 |
+
log_to_google_sheets([question_id, question, response.response, st.session_state['selected_api'], selected_model, embedding_model, retriever_method, chunk_size, top_k, 0, "", "", timestamp])
|
346 |
|
347 |
st.rerun()
|
348 |
else:
|