fschwartzer's picture
Update app.py
92f07c6 verified
raw
history blame
3.09 kB
import pandas as pd
import gradio as gr
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Load the model and tokenizer for GPT-2
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
df = pd.read_csv('anomalies.csv')
df['Feedback'] = "" # Initialize with empty strings instead of None for consistency
# Preprocessing steps
df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') # Format the datetime values
df['real'] = df['real'].apply(lambda x: f"{x:.2f}") # Format the float values to two decimal places
# Convert each row into a structured natural language sentence
def tokenize_row(row):
return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."
# Apply the tokenization function to each row
df['tokenized'] = df.apply(tokenize_row, axis=1)
# Function to respond to questions with GPT-2
def answer_question_with_gpt(question):
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Simplify the prompt to avoid exceeding token limits
latest_entries = df['tokenized'].tail(10).tolist() # Limit to the last 10 entries for context
prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
attention_mask = inputs['attention_mask']
input_ids = inputs['input_ids']
generated_ids = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=len(input_ids[0]) + 100,
temperature=0.3,
top_p=0.9,
no_repeat_ngram_size=2
)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# Extract the response after "Answer:"
response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
final_response = response_part.split(".")[0] + "."
return final_response
# Function to add feedback
def add_feedback(name, feedback):
global df
if name in df['Group'].values:
df.loc[df['Group'] == name, 'Feedback'] = feedback
return "Feedback successfully added."
else:
return "Data not found in DataFrame."
with gr.Blocks() as demo:
gr.Markdown("# Data Query and Feedback System")
with gr.Row():
with gr.Column():
question_input = gr.Textbox(label="Ask a Question")
answer_output = gr.Textbox(label="Answer", interactive=False)
ask_button = gr.Button("Ask")
with gr.Column():
name_input = gr.Textbox(label="Name for Feedback")
feedback_input = gr.Textbox(label="Feedback")
feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
submit_button = gr.Button("Submit Feedback")
ask_button.click(fn=answer_question_with_gpt, inputs=question_input, outputs=answer_output)
submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)
demo.launch()