Spaces:
Build error
Build error
File size: 2,914 Bytes
b0e4f45 2cc5fad b0e4f45 5e71278 6674859 b0e4f45 5e71278 b0e4f45 5e71278 b0e4f45 5e71278 b0e4f45 5e71278 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import streamlit as st
import pandas as pd
import numpy as np
from io import StringIO
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
# Initialize TAPAS model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
def ask_llm_chunk(chunk, questions):
chunk = chunk.astype(str)
try:
inputs = tokenizer(table=chunk, queries=questions, padding="max_length", return_tensors="pt")
if inputs["input_ids"].shape[1] > 512:
return ["Token limit exceeded for this chunk"] * len(questions)
outputs = model(**inputs)
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
inputs,
outputs.logits.detach(),
outputs.logits_aggregation.detach()
)
except Exception as e:
st.write(f"An error occurred: {e}")
return ["Error processing this chunk"] * len(questions)
answers = []
for coordinates in predicted_answer_coordinates:
if len(coordinates) == 1:
answers.append(chunk.iat[coordinates[0]])
else:
cell_values = []
for coordinate in coordinates:
cell_values.append(chunk.iat[coordinate])
answers.append(", ".join(cell_values))
return answers
MAX_ROWS_PER_CHUNK = 50 # Reduced chunk size
def summarize_map_reduce(data, questions):
try:
dataframe = pd.read_csv(StringIO(data))
except Exception as e:
st.write(f"Error reading the CSV file: {e}")
return []
num_chunks = len(dataframe) // MAX_ROWS_PER_CHUNK + 1
dataframe_chunks = np.array_split(dataframe, num_chunks)
all_answers = []
for chunk in dataframe_chunks:
chunk_answers = ask_llm_chunk(chunk, questions)
all_answers.extend(chunk_answers)
return all_answers
st.title("TAPAS Table Question Answering")
# Upload CSV data
csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
if csv_file is not None:
data = csv_file.read().decode("utf-8")
st.write("CSV Data Preview:")
st.write(pd.read_csv(StringIO(data)).head())
# Input for questions
questions = st.text_area("Enter your questions (one per line)")
questions = questions.split("\n") # split questions by line
questions = [q for q in questions if q] # remove empty strings
if st.button("Submit"):
if data and questions:
try:
answers = summarize_map_reduce(data, questions)
st.write("Answers:")
for q, a in zip(questions, answers):
st.write(f"Question: {q}")
st.write(f"Answer: {a}")
except Exception as e:
st.write(f"An error occurred: {e}")
|