jskinner215 commited on
Commit
ed7f9aa
·
1 Parent(s): 340cc83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -55
app.py CHANGED
@@ -113,63 +113,63 @@ def query_weaviate(question):
113
  results = client.query.get(class_name).with_near_text(question).do()
114
  return results
115
 
116
- def ask_llm_chunk(chunk, questions):
117
- chunk = chunk.astype(str)
118
- try:
119
- inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
120
- except Exception as e:
121
- log_debug_info(f"Tokenization error: {e}")
122
- st.write(f"An error occurred: {e}")
123
- return ["Error occurred while tokenizing"] * len(questions)
124
-
125
- if inputs["input_ids"].shape[1] > 512:
126
- log_debug_info("Token limit exceeded for chunk")
127
- st.warning("Token limit exceeded for chunk")
128
- return ["Token limit exceeded for chunk"] * len(questions)
129
-
130
- outputs = model(**inputs)
131
- predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
132
- inputs,
133
- outputs.logits.detach(),
134
- outputs.logits_aggregation.detach()
135
- )
136
-
137
- answers = []
138
- for coordinates in predicted_answer_coordinates:
139
- if len(coordinates) == 1:
140
- row, col = coordinates[0]
141
- try:
142
- value = chunk.iloc[row, col]
143
- log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
144
- answers.append(value)
145
- except Exception as e:
146
- log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
147
- st.write(f"An error occurred: {e}")
148
- else:
149
- cell_values = []
150
- for coordinate in coordinates:
151
- row, col = coordinate
152
- try:
153
- value = chunk.iloc[row, col]
154
- cell_values.append(value)
155
- except Exception as e:
156
- log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
157
- st.write(f"An error occurred: {e}")
158
- answers.append(", ".join(map(str, cell_values)))
159
-
160
- return answers
161
 
162
- MAX_ROWS_PER_CHUNK = 200
163
 
164
- def summarize_map_reduce(data, questions):
165
- dataframe = pd.read_csv(StringIO(data))
166
- num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
167
- dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
168
- all_answers = []
169
- for chunk in dataframe_chunks:
170
- chunk_answers = ask_llm_chunk(chunk, questions)
171
- all_answers.extend(chunk_answers)
172
- return all_answers
173
 
174
  def get_class_schema(class_name):
175
  """
 
113
  results = client.query.get(class_name).with_near_text(question).do()
114
  return results
115
 
116
+ #def ask_llm_chunk(chunk, questions):
117
+ # chunk = chunk.astype(str)
118
+ # try:
119
+ # inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
120
+ # except Exception as e:
121
+ # log_debug_info(f"Tokenization error: {e}")
122
+ # st.write(f"An error occurred: {e}")
123
+ # return ["Error occurred while tokenizing"] * len(questions)
124
+ #
125
+ ## if inputs["input_ids"].shape[1] > 512:
126
+ # log_debug_info("Token limit exceeded for chunk")
127
+ # st.warning("Token limit exceeded for chunk")
128
+ # return ["Token limit exceeded for chunk"] * len(questions)#
129
+ #
130
+ # outputs = model(**inputs)
131
+ # predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
132
+ # inputs,
133
+ # outputs.logits.detach(),
134
+ # outputs.logits_aggregation.detach()
135
+ # )
136
+ #
137
+ # answers = []
138
+ # for coordinates in predicted_answer_coordinates:
139
+ # if len(coordinates) == 1:
140
+ # row, col = coordinates[0]
141
+ # try:
142
+ # value = chunk.iloc[row, col]
143
+ # log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
144
+ # answers.append(value)
145
+ # except Exception as e:
146
+ # log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
147
+ # st.write(f"An error occurred: {e}")
148
+ # else:
149
+ # cell_values = []
150
+ # for coordinate in coordinates:
151
+ # row, col = coordinate
152
+ # try:
153
+ # value = chunk.iloc[row, col]
154
+ # cell_values.append(value)
155
+ # except Exception as e:
156
+ # log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
157
+ # st.write(f"An error occurred: {e}")
158
+ # answers.append(", ".join(map(str, cell_values)))
159
+ #
160
+ # return answers
161
 
162
+ # MAX_ROWS_PER_CHUNK = 200
163
 
164
+ # def summarize_map_reduce(data, questions):
165
+ # dataframe = pd.read_csv(StringIO(data))
166
+ # num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
167
+ # dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
168
+ # all_answers = []
169
+ # for chunk in dataframe_chunks:
170
+ # chunk_answers = ask_llm_chunk(chunk, questions)
171
+ # all_answers.extend(chunk_answers)
172
+ # return all_answers
173
 
174
  def get_class_schema(class_name):
175
  """