Spaces:
Build error
Build error
jskinner215
commited on
Commit
·
ed7f9aa
1
Parent(s):
340cc83
Update app.py
Browse files
app.py
CHANGED
@@ -113,63 +113,63 @@ def query_weaviate(question):
|
|
113 |
results = client.query.get(class_name).with_near_text(question).do()
|
114 |
return results
|
115 |
|
116 |
-
def ask_llm_chunk(chunk, questions):
|
117 |
-
chunk = chunk.astype(str)
|
118 |
-
try:
|
119 |
-
inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
|
120 |
-
except Exception as e:
|
121 |
-
log_debug_info(f"Tokenization error: {e}")
|
122 |
-
st.write(f"An error occurred: {e}")
|
123 |
-
return ["Error occurred while tokenizing"] * len(questions)
|
124 |
-
|
125 |
-
|
126 |
-
log_debug_info("Token limit exceeded for chunk")
|
127 |
-
st.warning("Token limit exceeded for chunk")
|
128 |
-
return ["Token limit exceeded for chunk"] * len(questions)
|
129 |
-
|
130 |
-
outputs = model(**inputs)
|
131 |
-
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
|
132 |
-
inputs,
|
133 |
-
outputs.logits.detach(),
|
134 |
-
outputs.logits_aggregation.detach()
|
135 |
-
)
|
136 |
-
|
137 |
-
answers = []
|
138 |
-
for coordinates in predicted_answer_coordinates:
|
139 |
-
|
140 |
-
row, col = coordinates[0]
|
141 |
-
try:
|
142 |
-
value = chunk.iloc[row, col]
|
143 |
-
log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
|
144 |
-
answers.append(value)
|
145 |
-
except Exception as e:
|
146 |
-
|
147 |
-
st.write(f"An error occurred: {e}")
|
148 |
-
else:
|
149 |
-
cell_values = []
|
150 |
-
for coordinate in coordinates:
|
151 |
-
row, col = coordinate
|
152 |
-
|
153 |
-
|
154 |
-
cell_values.append(value)
|
155 |
-
except Exception as e:
|
156 |
-
log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
|
157 |
-
st.write(f"An error occurred: {e}")
|
158 |
-
|
159 |
-
|
160 |
-
return answers
|
161 |
|
162 |
-
MAX_ROWS_PER_CHUNK = 200
|
163 |
|
164 |
-
def summarize_map_reduce(data, questions):
|
165 |
-
dataframe = pd.read_csv(StringIO(data))
|
166 |
-
num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
|
167 |
-
dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
|
168 |
-
|
169 |
-
for chunk in dataframe_chunks:
|
170 |
-
chunk_answers = ask_llm_chunk(chunk, questions)
|
171 |
-
all_answers.extend(chunk_answers)
|
172 |
-
return all_answers
|
173 |
|
174 |
def get_class_schema(class_name):
|
175 |
"""
|
|
|
113 |
results = client.query.get(class_name).with_near_text(question).do()
|
114 |
return results
|
115 |
|
116 |
+
#def ask_llm_chunk(chunk, questions):
|
117 |
+
# chunk = chunk.astype(str)
|
118 |
+
# try:
|
119 |
+
# inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
|
120 |
+
# except Exception as e:
|
121 |
+
# log_debug_info(f"Tokenization error: {e}")
|
122 |
+
# st.write(f"An error occurred: {e}")
|
123 |
+
# return ["Error occurred while tokenizing"] * len(questions)
|
124 |
+
#
|
125 |
+
## if inputs["input_ids"].shape[1] > 512:
|
126 |
+
# log_debug_info("Token limit exceeded for chunk")
|
127 |
+
# st.warning("Token limit exceeded for chunk")
|
128 |
+
# return ["Token limit exceeded for chunk"] * len(questions)#
|
129 |
+
#
|
130 |
+
# outputs = model(**inputs)
|
131 |
+
# predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
|
132 |
+
# inputs,
|
133 |
+
# outputs.logits.detach(),
|
134 |
+
# outputs.logits_aggregation.detach()
|
135 |
+
# )
|
136 |
+
#
|
137 |
+
# answers = []
|
138 |
+
# for coordinates in predicted_answer_coordinates:
|
139 |
+
# if len(coordinates) == 1:
|
140 |
+
# row, col = coordinates[0]
|
141 |
+
# try:
|
142 |
+
# value = chunk.iloc[row, col]
|
143 |
+
# log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
|
144 |
+
# answers.append(value)
|
145 |
+
# except Exception as e:
|
146 |
+
# log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
|
147 |
+
# st.write(f"An error occurred: {e}")
|
148 |
+
# else:
|
149 |
+
# cell_values = []
|
150 |
+
# for coordinate in coordinates:
|
151 |
+
# row, col = coordinate
|
152 |
+
# try:
|
153 |
+
# value = chunk.iloc[row, col]
|
154 |
+
# cell_values.append(value)
|
155 |
+
# except Exception as e:
|
156 |
+
# log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
|
157 |
+
# st.write(f"An error occurred: {e}")
|
158 |
+
# answers.append(", ".join(map(str, cell_values)))
|
159 |
+
#
|
160 |
+
# return answers
|
161 |
|
162 |
+
# MAX_ROWS_PER_CHUNK = 200
|
163 |
|
164 |
+
# def summarize_map_reduce(data, questions):
|
165 |
+
# dataframe = pd.read_csv(StringIO(data))
|
166 |
+
# num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
|
167 |
+
# dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
|
168 |
+
# all_answers = []
|
169 |
+
# for chunk in dataframe_chunks:
|
170 |
+
# chunk_answers = ask_llm_chunk(chunk, questions)
|
171 |
+
# all_answers.extend(chunk_answers)
|
172 |
+
# return all_answers
|
173 |
|
174 |
def get_class_schema(class_name):
|
175 |
"""
|