Spaces:

jskinner215
/

TAPAS_WTQ_Chunking

Build error

App Files Files Community

jskinner215 commited on Sep 10, 2023

Commit

ed7f9aa

1 Parent(s): 340cc83

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -55

app.py CHANGED Viewed

@@ -113,63 +113,63 @@ def query_weaviate(question):
     results = client.query.get(class_name).with_near_text(question).do()
     return results
-def ask_llm_chunk(chunk, questions):
-    chunk = chunk.astype(str)
-    try:
-        inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
-    except Exception as e:
-        log_debug_info(f"Tokenization error: {e}")
-        st.write(f"An error occurred: {e}")
-        return ["Error occurred while tokenizing"] * len(questions)
-    if inputs["input_ids"].shape[1] > 512:
-        log_debug_info("Token limit exceeded for chunk")
-        st.warning("Token limit exceeded for chunk")
-        return ["Token limit exceeded for chunk"] * len(questions)
-    outputs = model(**inputs)
-    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
-        inputs,
-        outputs.logits.detach(),
-        outputs.logits_aggregation.detach()
-    )
-    answers = []
-    for coordinates in predicted_answer_coordinates:
-        if len(coordinates) == 1:
-            row, col = coordinates[0]
-            try:
-                value = chunk.iloc[row, col]
-                log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
-                answers.append(value)
-            except Exception as e:
-                log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
-                st.write(f"An error occurred: {e}")
-        else:
-            cell_values = []
-            for coordinate in coordinates:
-                row, col = coordinate
-                try:
-                    value = chunk.iloc[row, col]
-                    cell_values.append(value)
-                except Exception as e:
-                    log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
-                    st.write(f"An error occurred: {e}")
-            answers.append(", ".join(map(str, cell_values)))
-    return answers
-MAX_ROWS_PER_CHUNK = 200
-def summarize_map_reduce(data, questions):
-    dataframe = pd.read_csv(StringIO(data))
-    num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
-    dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
-    all_answers = []
-    for chunk in dataframe_chunks:
-        chunk_answers = ask_llm_chunk(chunk, questions)
-        all_answers.extend(chunk_answers)
-    return all_answers
 def get_class_schema(class_name):
     """

     results = client.query.get(class_name).with_near_text(question).do()
     return results
+#def ask_llm_chunk(chunk, questions):
+#    chunk = chunk.astype(str)
+#    try:
+#        inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
+#    except Exception as e:
+#        log_debug_info(f"Tokenization error: {e}")
+#        st.write(f"An error occurred: {e}")
+#        return ["Error occurred while tokenizing"] * len(questions)
+#
+ ##   if inputs["input_ids"].shape[1] > 512:
+#        log_debug_info("Token limit exceeded for chunk")
+#        st.warning("Token limit exceeded for chunk")
+#        return ["Token limit exceeded for chunk"] * len(questions)#
+#
+#    outputs = model(**inputs)
+#    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+#        inputs,
+#        outputs.logits.detach(),
+#        outputs.logits_aggregation.detach()
+#    )
+#
+#    answers = []
+#    for coordinates in predicted_answer_coordinates:
+ #       if len(coordinates) == 1:
+#            row, col = coordinates[0]
+#            try:
+#                value = chunk.iloc[row, col]
+#                log_debug_info(f"Accessed value for row {row}, col {col}: {value}")
+#                answers.append(value)
+#            except Exception as e:
+#               log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
+#                st.write(f"An error occurred: {e}")
+#        else:
+#            cell_values = []
+#            for coordinate in coordinates:
+#                row, col = coordinate
+#               try:
+#                   value = chunk.iloc[row, col]
+#                    cell_values.append(value)
+#                except Exception as e:
+#                    log_debug_info(f"Error accessing value for row {row}, col {col}: {e}")
+#                    st.write(f"An error occurred: {e}")
+#           answers.append(", ".join(map(str, cell_values)))
+#
+#    return answers
+# MAX_ROWS_PER_CHUNK = 200
+# def summarize_map_reduce(data, questions):
+#    dataframe = pd.read_csv(StringIO(data))
+#    num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
+#    dataframe_chunks = [deepcopy(chunk) for chunk in np.array_split(dataframe, num_chunks)]
+#   all_answers = []
+#    for chunk in dataframe_chunks:
+#        chunk_answers = ask_llm_chunk(chunk, questions)
+#        all_answers.extend(chunk_answers)
+#    return all_answers
 def get_class_schema(class_name):
     """