Spaces:

jskinner215
/

TAPAS_WTQ_Chunking

Build error

App Files Files Community

jskinner215 commited on Sep 10, 2023

Commit

3b3c852

1 Parent(s): f790556

added debugging features

Browse files

Files changed (1) hide show

app.py +39 -4

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from copy import deepcopy
 import streamlit as st
 import pandas as pd
 from io import StringIO
@@ -8,7 +9,13 @@ import weaviate
 from weaviate.embedded import EmbeddedOptions
 from weaviate import Client
 from weaviate.util import generate_uuid5
 # Initialize TAPAS model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
 model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
@@ -18,6 +25,22 @@ client = weaviate.Client(
   embedded_options=EmbeddedOptions()
 )
 # Function to check if a class already exists in Weaviate
 def class_exists(class_name):
     try:
@@ -76,6 +99,8 @@ def ingest_data_to_weaviate(dataframe, class_name, class_description):
         }
         client.data_object.create(obj)
 def query_weaviate(question):
     # This is a basic example; adapt the query based on the question
@@ -87,10 +112,12 @@ def ask_llm_chunk(chunk, questions):
     try:
         inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
     except Exception as e:
         st.write(f"An error occurred: {e}")
         return ["Error occurred while tokenizing"] * len(questions)
     if inputs["input_ids"].shape[1] > 512:
         st.warning("Token limit exceeded for chunk")
         return ["Token limit exceeded for chunk"] * len(questions)
@@ -106,13 +133,11 @@ def ask_llm_chunk(chunk, questions):
         if len(coordinates) == 1:
             row, col = coordinates[0]
             try:
-                st.write(f"DataFrame shape: {chunk.shape}")  # Debugging line
-                st.write(f"DataFrame columns: {chunk.columns}")  # Debugging line
-                st.write(f"Trying to access row {row}, col {col}")  # Debugging line
                 value = chunk.iloc[row, col]
-                st.write(f"Value accessed: {value}")  # Debugging line
                 answers.append(value)
             except Exception as e:
                 st.write(f"An error occurred: {e}")
         else:
             cell_values = []
@@ -122,6 +147,7 @@ def ask_llm_chunk(chunk, questions):
                     value = chunk.iloc[row, col]
                     cell_values.append(value)
                 except Exception as e:
                     st.write(f"An error occurred: {e}")
             answers.append(", ".join(map(str, cell_values)))
@@ -180,6 +206,9 @@ if selected_class != "New Class":
 if csv_file is not None:
     data = csv_file.read().decode("utf-8")
     dataframe = pd.read_csv(StringIO(data))
     # Display the uploaded CSV data
     st.write("Uploaded CSV Data:")
@@ -207,6 +236,12 @@ if csv_file is not None:
                 st.write(f"Question: {q}")
                 st.write(f"Answer: {a}")
 # Add Ctrl+Enter functionality for submitting the questions
 st.markdown("""
     <script>

 from copy import deepcopy
+from langchain.callbacks import StreamlitCallbackHandler
 import streamlit as st
 import pandas as pd
 from io import StringIO
 from weaviate.embedded import EmbeddedOptions
 from weaviate import Client
 from weaviate.util import generate_uuid5
+import logging
+class StreamlitCallbackHandler(logging.Handler):
+    def emit(self, record):
+        log_entry = self.format(record)
+        st.write(log_entry)
 # Initialize TAPAS model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
 model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
   embedded_options=EmbeddedOptions()
 )
+# Global list to store debugging information
+DEBUG_LOGS = []
+def log_debug_info(message):
+    if st.session_state.debug:
+        logger = logging.getLogger(__name__)
+        logger.setLevel(logging.DEBUG)
+        # Check if StreamlitCallbackHandler is already added to avoid duplicate logs
+        if not any(isinstance(handler, StreamlitCallbackHandler) for handler in logger.handlers):
+            handler = StreamlitCallbackHandler()
+            logger.addHandler(handler)
+        logger.debug(message)
 # Function to check if a class already exists in Weaviate
 def class_exists(class_name):
     try:
         }
         client.data_object.create(obj)
+    # Log data ingestion
+    log_debug_info(f"Data ingested into Weaviate for class: {class_name}")
 def query_weaviate(question):
     # This is a basic example; adapt the query based on the question
     try:
         inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
     except Exception as e:
+        log_debug_info(f"Tokenization error: {e}")
         st.write(f"An error occurred: {e}")
         return ["Error occurred while tokenizing"] * len(questions)
     if inputs["input_ids"].shape[1] > 512:
+        log_debug_info("Token limit exceeded for chunk")
         st.warning("Token limit exceeded for chunk")
         return ["Token limit exceeded for chunk"] * len(questions)
         if len(coordinates) == 1:
             row, col = coordinates[0]
             try:
                 value = chunk.iloc[row, col]
+                log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
                 answers.append(value)
             except Exception as e:
+                log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
                 st.write(f"An error occurred: {e}")
         else:
             cell_values = []
                     value = chunk.iloc[row, col]
                     cell_values.append(value)
                 except Exception as e:
+                    log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
                     st.write(f"An error occurred: {e}")
             answers.append(", ".join(map(str, cell_values)))
 if csv_file is not None:
     data = csv_file.read().decode("utf-8")
     dataframe = pd.read_csv(StringIO(data))
+    # Log CSV upload information
+    log_debug_info(f"CSV uploaded with shape: {dataframe.shape}")
     # Display the uploaded CSV data
     st.write("Uploaded CSV Data:")
                 st.write(f"Question: {q}")
                 st.write(f"Answer: {a}")
+# Display debugging information
+if st.checkbox("Show Debugging Information"):
+    st.write("Debugging Logs:")
+    for log in DEBUG_LOGS:
+        st.write(log)
 # Add Ctrl+Enter functionality for submitting the questions
 st.markdown("""
     <script>