Spaces:

rk68
/

HyPA-RAG

Runtime error

App Files Files Community

rk68 commited on Jul 9, 2024

Commit

445d770

verified ·

1 Parent(s): fa16c7b

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -25

app.py CHANGED Viewed

@@ -78,7 +78,7 @@ def initialize_apis(api, model, pinecone_api_key, groq_api_key, azure_api_key):
             llm = initialize_llm(api, model, groq_api_key, azure_api_key)
         if pinecone_index is None:
             pinecone_client = Pinecone(pinecone_api_key)
-            pinecone_index = pinecone_client.Index("demo")
         logging.info("Initialized LLM and Pinecone.")
     except Exception as e:
         log_and_exit(f"Error initializing APIs: {e}")
@@ -103,10 +103,9 @@ def initialize_llm(api, model, groq_api_key, azure_api_key):
             )
 def load_pdf_data(chunk_size):
-    PDF_FILE_PATH = "policy.pdf"
     reader = PyMuPDFReader()
     file_extractor = {".pdf": reader}
-    documents = SimpleDirectoryReader(input_files=[PDF_FILE_PATH], file_extractor=file_extractor).load_data()
     return documents
 def create_index(documents, embedding_model_type="HF", embedding_model="BAAI/bge-large-en-v1.5", retriever_method="BM25", chunk_size=512):
@@ -227,7 +226,7 @@ def log_to_google_sheets(data):
     except Exception as e:
         logging.error(f"Error logging data to Google Sheets: {e}")
-def update_google_sheets(question_id, feedback=None, detailed_feedback=None):
     try:
         existing_data = sheet.get_all_values()
         headers = existing_data[0]
@@ -237,6 +236,8 @@ def update_google_sheets(question_id, feedback=None, detailed_feedback=None):
                     sheet.update_cell(i+1, headers.index("Feedback") + 1, feedback)
                 if detailed_feedback is not None:
                     sheet.update_cell(i+1, headers.index("Detailed Feedback") + 1, detailed_feedback)
                 logging.info("Updated data in Google Sheets.")
                 return
     except Exception as e:
@@ -252,7 +253,6 @@ def run_streamlit_app():
     with col1:
         pinecone_api_key = st.text_input("Pinecone API Key")
-        parse_api_key = st.text_input("Parse API Key")
         azure_api_key = st.text_input("Azure API Key")
         groq_api_key = st.text_input("Groq API Key")
@@ -264,8 +264,8 @@ def run_streamlit_app():
             st.session_state['selected_api'] = 'groq'
     with col2:
-        selected_model = st.selectbox("Select Model", ["llama3-8b", "llama3-70b", "mixtral-8x7b", "gemma-7b", "gpt35"], key='selected_model', on_change=update_api_based_on_model)
-        selected_api = st.selectbox("Select API", ["azure", "groq"], key='selected_api', disabled=True)
         embedding_model_type = "HF"
         embedding_model = st.selectbox("Select Embedding Model", ["BAAI/bge-large-en-v1.5", "other_model"])
         retriever_method = st.selectbox("Select Retriever Method", ["Vector Search", "BM25", "BM25+Vector"])
@@ -298,25 +298,35 @@ def run_streamlit_app():
                 )
             st.markdown("### Answer")
             st.markdown(chat['response'])
-            col1, col2, col3 = st.columns([1, 1, 3])
             with col1:
-                if st.button("👍", key=f"up_{chat_index}"):
-                    if chat['feedback'] == 0:
-                        chat['feedback'] = 1
-                        st.session_state.chat_history[chat_index] = chat
-                        update_google_sheets(chat['id'], feedback=1)
             with col2:
-                if st.button("👎", key=f"down_{chat_index}"):
-                    if chat['feedback'] == 0:
-                        chat['feedback'] = -1
-                        st.session_state.chat_history[chat_index] = chat
-                        update_google_sheets(chat['id'], feedback=-1)
-            with col3:
-                feedback = st.text_area("How was the response? Does it match the context? Does it answer the question fully?", key=f"textarea_{chat_index}")
-                if st.button("Submit Feedback", key=f"submit_{chat_index}"):
-                    chat['detailed_feedback'] = feedback
                     st.session_state.chat_history[chat_index] = chat
-                    update_google_sheets(chat['id'], detailed_feedback=feedback)
     if question := st.chat_input("Enter your question"):
         if st.session_state.query_engine:
@@ -329,10 +339,10 @@ def run_streamlit_app():
                 logging.info(f"Retrieved contexts: {[node.text for node in response.source_nodes]}")
             question_id = str(uuid.uuid4())
             timestamp = datetime.now().isoformat()
-            st.session_state.chat_history.append({'id': question_id, 'user': question, 'response': response.response, 'contexts': response.source_nodes, 'feedback': 0, 'detailed_feedback': '', 'timestamp': timestamp})
             # Log initial query and response to Google Sheets without feedback
-            log_to_google_sheets([question_id, question, response.response, st.session_state['selected_api'], selected_model, embedding_model, retriever_method, chunk_size, top_k, 0, "", timestamp])
             st.rerun()
         else:

             llm = initialize_llm(api, model, groq_api_key, azure_api_key)
         if pinecone_index is None:
             pinecone_client = Pinecone(pinecone_api_key)
+            pinecone_index = pinecone_client.Index("ll144")
         logging.info("Initialized LLM and Pinecone.")
     except Exception as e:
         log_and_exit(f"Error initializing APIs: {e}")
             )
 def load_pdf_data(chunk_size):
     reader = PyMuPDFReader()
     file_extractor = {".pdf": reader}
+    documents = SimpleDirectoryReader(input_files=['LL144.pdf', 'LL144_Definitions.pdf'], file_extractor=file_extractor).load_data()
     return documents
 def create_index(documents, embedding_model_type="HF", embedding_model="BAAI/bge-large-en-v1.5", retriever_method="BM25", chunk_size=512):
     except Exception as e:
         logging.error(f"Error logging data to Google Sheets: {e}")
+def update_google_sheets(question_id, feedback=None, detailed_feedback=None, annotated_answer=None):
     try:
         existing_data = sheet.get_all_values()
         headers = existing_data[0]
                     sheet.update_cell(i+1, headers.index("Feedback") + 1, feedback)
                 if detailed_feedback is not None:
                     sheet.update_cell(i+1, headers.index("Detailed Feedback") + 1, detailed_feedback)
+                if annotated_answer is not None:
+                    sheet.update_cell(i+1, headers.index("annotated_answer") + 1, annotated_answer)
                 logging.info("Updated data in Google Sheets.")
                 return
     except Exception as e:
     with col1:
         pinecone_api_key = st.text_input("Pinecone API Key")
         azure_api_key = st.text_input("Azure API Key")
         groq_api_key = st.text_input("Groq API Key")
             st.session_state['selected_api'] = 'groq'
     with col2:
+        selected_model = st.selectbox("Select Model", ["llama3-8b", "llama3-70b", "mixtral-8x7b", "gemma-7b", "gpt35"], index=4, key='selected_model', on_change=update_api_based_on_model)
+        selected_api = st.selectbox("Select API", ["azure", "groq"], index=0, key='selected_api', disabled=True)
         embedding_model_type = "HF"
         embedding_model = st.selectbox("Select Embedding Model", ["BAAI/bge-large-en-v1.5", "other_model"])
         retriever_method = st.selectbox("Select Retriever Method", ["Vector Search", "BM25", "BM25+Vector"])
                 )
             st.markdown("### Answer")
             st.markdown(chat['response'])
+            col1, col2 = st.columns([1, 1])
             with col1:
+                if st.button("Annotate 👎", key=f"annotate_{chat_index}"):
+                    chat['annotate'] = True
+                    chat['feedback'] = -1
+                    st.session_state.chat_history[chat_index] = chat
+                    update_google_sheets(chat['id'], feedback=-1)
+                    st.rerun()
             with col2:
+                if st.button("Approve 👍", key=f"approve_{chat_index}"):
+                    chat['approved'] = True
+                    chat['feedback'] = 1
                     st.session_state.chat_history[chat_index] = chat
+                    update_google_sheets(chat['id'], feedback=1, annotated_answer=chat['response'])
+            if chat.get('annotate', False):
+                annotated_answer = st.text_area("Annotate Answer", value=chat['response'], key=f"annotate_text_{chat_index}")
+                if st.button("Submit Annotated Answer", key=f"submit_annotate_{chat_index}"):
+                    chat['annotated_answer'] = annotated_answer
+                    chat['annotate'] = False
+                    st.session_state.chat_history[chat_index] = chat
+                    update_google_sheets(chat['id'], annotated_answer=annotated_answer)
+            feedback = st.text_area("How was the response? Does it match the context? Does it answer the question fully?", key=f"textarea_{chat_index}")
+            if st.button("Submit Feedback", key=f"submit_{chat_index}"):
+                chat['detailed_feedback'] = feedback
+                st.session_state.chat_history[chat_index] = chat
+                update_google_sheets(chat['id'], detailed_feedback=feedback)
     if question := st.chat_input("Enter your question"):
         if st.session_state.query_engine:
                 logging.info(f"Retrieved contexts: {[node.text for node in response.source_nodes]}")
             question_id = str(uuid.uuid4())
             timestamp = datetime.now().isoformat()
+            st.session_state.chat_history.append({'id': question_id, 'user': question, 'response': response.response, 'contexts': response.source_nodes, 'feedback': 0, 'detailed_feedback': '', 'annotated_answer': '', 'timestamp': timestamp})
             # Log initial query and response to Google Sheets without feedback
+            log_to_google_sheets([question_id, question, response.response, st.session_state['selected_api'], selected_model, embedding_model, retriever_method, chunk_size, top_k, 0, "", "", timestamp])
             st.rerun()
         else: