jskinner215 commited on
Commit
3b3c852
·
1 Parent(s): f790556

added debugging features

Browse files
Files changed (1) hide show
  1. app.py +39 -4
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from copy import deepcopy
 
2
  import streamlit as st
3
  import pandas as pd
4
  from io import StringIO
@@ -8,7 +9,13 @@ import weaviate
8
  from weaviate.embedded import EmbeddedOptions
9
  from weaviate import Client
10
  from weaviate.util import generate_uuid5
 
11
 
 
 
 
 
 
12
  # Initialize TAPAS model and tokenizer
13
  tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
14
  model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
@@ -18,6 +25,22 @@ client = weaviate.Client(
18
  embedded_options=EmbeddedOptions()
19
  )
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Function to check if a class already exists in Weaviate
22
  def class_exists(class_name):
23
  try:
@@ -76,6 +99,8 @@ def ingest_data_to_weaviate(dataframe, class_name, class_description):
76
  }
77
  client.data_object.create(obj)
78
 
 
 
79
 
80
  def query_weaviate(question):
81
  # This is a basic example; adapt the query based on the question
@@ -87,10 +112,12 @@ def ask_llm_chunk(chunk, questions):
87
  try:
88
  inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
89
  except Exception as e:
 
90
  st.write(f"An error occurred: {e}")
91
  return ["Error occurred while tokenizing"] * len(questions)
92
 
93
  if inputs["input_ids"].shape[1] > 512:
 
94
  st.warning("Token limit exceeded for chunk")
95
  return ["Token limit exceeded for chunk"] * len(questions)
96
 
@@ -106,13 +133,11 @@ def ask_llm_chunk(chunk, questions):
106
  if len(coordinates) == 1:
107
  row, col = coordinates[0]
108
  try:
109
- st.write(f"DataFrame shape: {chunk.shape}") # Debugging line
110
- st.write(f"DataFrame columns: {chunk.columns}") # Debugging line
111
- st.write(f"Trying to access row {row}, col {col}") # Debugging line
112
  value = chunk.iloc[row, col]
113
- st.write(f"Value accessed: {value}") # Debugging line
114
  answers.append(value)
115
  except Exception as e:
 
116
  st.write(f"An error occurred: {e}")
117
  else:
118
  cell_values = []
@@ -122,6 +147,7 @@ def ask_llm_chunk(chunk, questions):
122
  value = chunk.iloc[row, col]
123
  cell_values.append(value)
124
  except Exception as e:
 
125
  st.write(f"An error occurred: {e}")
126
  answers.append(", ".join(map(str, cell_values)))
127
 
@@ -180,6 +206,9 @@ if selected_class != "New Class":
180
  if csv_file is not None:
181
  data = csv_file.read().decode("utf-8")
182
  dataframe = pd.read_csv(StringIO(data))
 
 
 
183
 
184
  # Display the uploaded CSV data
185
  st.write("Uploaded CSV Data:")
@@ -207,6 +236,12 @@ if csv_file is not None:
207
  st.write(f"Question: {q}")
208
  st.write(f"Answer: {a}")
209
 
 
 
 
 
 
 
210
  # Add Ctrl+Enter functionality for submitting the questions
211
  st.markdown("""
212
  <script>
 
1
  from copy import deepcopy
2
+ from langchain.callbacks import StreamlitCallbackHandler
3
  import streamlit as st
4
  import pandas as pd
5
  from io import StringIO
 
9
  from weaviate.embedded import EmbeddedOptions
10
  from weaviate import Client
11
  from weaviate.util import generate_uuid5
12
+ import logging
13
 
14
+ class StreamlitCallbackHandler(logging.Handler):
15
+ def emit(self, record):
16
+ log_entry = self.format(record)
17
+ st.write(log_entry)
18
+
19
  # Initialize TAPAS model and tokenizer
20
  tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
21
  model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
 
25
  embedded_options=EmbeddedOptions()
26
  )
27
 
28
+ # Global list to store debugging information
29
+ DEBUG_LOGS = []
30
+
31
+ def log_debug_info(message):
32
+ if st.session_state.debug:
33
+ logger = logging.getLogger(__name__)
34
+ logger.setLevel(logging.DEBUG)
35
+
36
+ # Check if StreamlitCallbackHandler is already added to avoid duplicate logs
37
+ if not any(isinstance(handler, StreamlitCallbackHandler) for handler in logger.handlers):
38
+ handler = StreamlitCallbackHandler()
39
+ logger.addHandler(handler)
40
+
41
+ logger.debug(message)
42
+
43
+
44
  # Function to check if a class already exists in Weaviate
45
  def class_exists(class_name):
46
  try:
 
99
  }
100
  client.data_object.create(obj)
101
 
102
+ # Log data ingestion
103
+ log_debug_info(f"Data ingested into Weaviate for class: {class_name}")
104
 
105
  def query_weaviate(question):
106
  # This is a basic example; adapt the query based on the question
 
112
  try:
113
  inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
114
  except Exception as e:
115
+ log_debug_info(f"Tokenization error: {e}")
116
  st.write(f"An error occurred: {e}")
117
  return ["Error occurred while tokenizing"] * len(questions)
118
 
119
  if inputs["input_ids"].shape[1] > 512:
120
+ log_debug_info("Token limit exceeded for chunk")
121
  st.warning("Token limit exceeded for chunk")
122
  return ["Token limit exceeded for chunk"] * len(questions)
123
 
 
133
  if len(coordinates) == 1:
134
  row, col = coordinates[0]
135
  try:
 
 
 
136
  value = chunk.iloc[row, col]
137
+ log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
138
  answers.append(value)
139
  except Exception as e:
140
+ log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
141
  st.write(f"An error occurred: {e}")
142
  else:
143
  cell_values = []
 
147
  value = chunk.iloc[row, col]
148
  cell_values.append(value)
149
  except Exception as e:
150
+ log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
151
  st.write(f"An error occurred: {e}")
152
  answers.append(", ".join(map(str, cell_values)))
153
 
 
206
  if csv_file is not None:
207
  data = csv_file.read().decode("utf-8")
208
  dataframe = pd.read_csv(StringIO(data))
209
+
210
+ # Log CSV upload information
211
+ log_debug_info(f"CSV uploaded with shape: {dataframe.shape}")
212
 
213
  # Display the uploaded CSV data
214
  st.write("Uploaded CSV Data:")
 
236
  st.write(f"Question: {q}")
237
  st.write(f"Answer: {a}")
238
 
239
+ # Display debugging information
240
+ if st.checkbox("Show Debugging Information"):
241
+ st.write("Debugging Logs:")
242
+ for log in DEBUG_LOGS:
243
+ st.write(log)
244
+
245
  # Add Ctrl+Enter functionality for submitting the questions
246
  st.markdown("""
247
  <script>