jskinner215 commited on
Commit
0e62360
·
1 Parent(s): 45ee012

added embedded weaviate client and ingest_data + query_weaviate functions

Browse files

from copy import deepcopy
import streamlit as st
import pandas as pd
from io import StringIO
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
import numpy as np
import weaviate

# Initialize TAPAS model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")

# Initialize Weaviate client for the embedded instance
client = weaviate.Client("http://localhost:8080")

# Function to ingest data into Weaviate
def ingest_data_to_weaviate(dataframe):
for index, row in dataframe.iterrows():
obj = {
"class": "YourClassName",
"id": str(index),
"properties": row.to_dict()
}
client.data_object.create(obj)

# Function to query data from Weaviate
def query_weaviate(question):
# This is a basic example; adapt the query based on the question
results = client.query.get('YourClassName').with_near_text(question).do()
return results

# Existing function to ask TAPAS
def ask_llm_chunk(chunk, questions):
# ... [rest of the function remains unchanged]

# Existing function to handle large datasets
def summarize_map_reduce(data, questions):
# ... [rest of the function remains unchanged]

st.title("TAPAS Table Question Answering with Weaviate")

# Upload CSV data
csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
if csv_file is not None:
data = csv_file.read().decode("utf-8")
dataframe = pd.read_csv(StringIO(data))

# Ingest data into Weaviate
ingest_data_to_weaviate(dataframe)

st.write("CSV Data Preview:")
st.write(dataframe.head())

# Input for questions
questions = st.text_area("Enter your questions (one per line)")
questions = questions.split("\n") # split questions by line
questions = [q for q in questions if q] # remove empty strings

if st.button("Submit"):
if data and questions:
# Query Weaviate to get relevant data
relevant_data = query_weaviate(questions[0]) # Example: using the first question
# Convert the relevant data to a DataFrame (you might need to adjust this based on the Weaviate response format)
relevant_df = pd.DataFrame(relevant_data)

# Pass the relevant data to TAPAS
answers = summarize_map_reduce(relevant_df, questions)

st.write("Answers:")
for q, a in zip(questions, answers):
st.write(f"Question: {q}")
st.write(f"Answer: {a}")

# Add Ctrl+Enter functionality for submitting the questions
st.markdown("""
<script>
document.addEventListener("DOMContentLoaded", function(event) {
document.addEventListener("keydown", function(event) {
if (event.ctrlKey && event.key === "Enter") {
document.querySelector(".stButton button").click();
}
});
});
</script>
""", unsafe_allow_html=True)

Files changed (1) hide show
  1. app.py +36 -5
app.py CHANGED
@@ -1,15 +1,34 @@
1
  from copy import deepcopy
2
-
3
  import streamlit as st
4
  import pandas as pd
5
  from io import StringIO
6
  from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
7
  import numpy as np
 
8
 
9
  # Initialize TAPAS model and tokenizer
10
  tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
11
  model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def ask_llm_chunk(chunk, questions):
14
  chunk = chunk.astype(str)
15
  try:
@@ -67,14 +86,19 @@ def summarize_map_reduce(data, questions):
67
  all_answers.extend(chunk_answers)
68
  return all_answers
69
 
70
- st.title("TAPAS Table Question Answering")
71
 
72
  # Upload CSV data
73
  csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
74
  if csv_file is not None:
75
  data = csv_file.read().decode("utf-8")
 
 
 
 
 
76
  st.write("CSV Data Preview:")
77
- st.write(pd.read_csv(StringIO(data)).head())
78
 
79
  # Input for questions
80
  questions = st.text_area("Enter your questions (one per line)")
@@ -83,7 +107,14 @@ if csv_file is not None:
83
 
84
  if st.button("Submit"):
85
  if data and questions:
86
- answers = summarize_map_reduce(data, questions)
 
 
 
 
 
 
 
87
  st.write("Answers:")
88
  for q, a in zip(questions, answers):
89
  st.write(f"Question: {q}")
@@ -100,4 +131,4 @@ st.markdown("""
100
  });
101
  });
102
  </script>
103
- """, unsafe_allow_html=True)
 
1
  from copy import deepcopy
 
2
  import streamlit as st
3
  import pandas as pd
4
  from io import StringIO
5
  from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
6
  import numpy as np
7
+ import weaviate
8
 
9
  # Initialize TAPAS model and tokenizer
10
  tokenizer = AutoTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
11
  model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
12
 
13
+ # Initialize Weaviate client for the embedded instance
14
+ client = weaviate.Client("http://localhost:8080")
15
+
16
+ # Function to ingest data into Weaviate
17
+ def ingest_data_to_weaviate(dataframe):
18
+ for index, row in dataframe.iterrows():
19
+ obj = {
20
+ "class": "YourClassName",
21
+ "id": str(index),
22
+ "properties": row.to_dict()
23
+ }
24
+ client.data_object.create(obj)
25
+
26
+ # Function to query data from Weaviate
27
+ def query_weaviate(question):
28
+ # This is a basic example; adapt the query based on the question
29
+ results = client.query.get('YourClassName').with_near_text(question).do()
30
+ return results
31
+
32
  def ask_llm_chunk(chunk, questions):
33
  chunk = chunk.astype(str)
34
  try:
 
86
  all_answers.extend(chunk_answers)
87
  return all_answers
88
 
89
+ st.title("TAPAS Table Question Answering with Weaviate")
90
 
91
  # Upload CSV data
92
  csv_file = st.file_uploader("Upload a CSV file", type=["csv"])
93
  if csv_file is not None:
94
  data = csv_file.read().decode("utf-8")
95
+ dataframe = pd.read_csv(StringIO(data))
96
+
97
+ # Ingest data into Weaviate
98
+ ingest_data_to_weaviate(dataframe)
99
+
100
  st.write("CSV Data Preview:")
101
+ st.write(dataframe.head())
102
 
103
  # Input for questions
104
  questions = st.text_area("Enter your questions (one per line)")
 
107
 
108
  if st.button("Submit"):
109
  if data and questions:
110
+ # Query Weaviate to get relevant data
111
+ relevant_data = query_weaviate(questions[0]) # Example: using the first question
112
+ # Convert the relevant data to a DataFrame (you might need to adjust this based on the Weaviate response format)
113
+ relevant_df = pd.DataFrame(relevant_data)
114
+
115
+ # Pass the relevant data to TAPAS
116
+ answers = summarize_map_reduce(relevant_df, questions)
117
+
118
  st.write("Answers:")
119
  for q, a in zip(questions, answers):
120
  st.write(f"Question: {q}")
 
131
  });
132
  });
133
  </script>
134
+ """, unsafe_allow_html=True)