Spaces:
Runtime error
Runtime error
File size: 3,091 Bytes
723fe48 1ec6af9 82871f4 92f07c6 1ec6af9 723fe48 be2fac2 92f07c6 723fe48 82a021e 92f07c6 1ec6af9 95dfd79 92f07c6 a200765 5e4ebb0 92f07c6 5e4ebb0 a200765 1ec6af9 92f07c6 45cb5c0 1ec6af9 92f07c6 723fe48 92f07c6 1ec6af9 92f07c6 723fe48 92f07c6 723fe48 92f07c6 723fe48 92f07c6 723fe48 92f07c6 1ec6af9 723fe48 92f07c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import pandas as pd
import gradio as gr
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Load the model and tokenizer for GPT-2
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
df = pd.read_csv('anomalies.csv')
df['Feedback'] = "" # Initialize with empty strings instead of None for consistency
# Preprocessing steps
df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') # Format the datetime values
df['real'] = df['real'].apply(lambda x: f"{x:.2f}") # Format the float values to two decimal places
# Convert each row into a structured natural language sentence
def tokenize_row(row):
return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."
# Apply the tokenization function to each row
df['tokenized'] = df.apply(tokenize_row, axis=1)
# Function to respond to questions with GPT-2
def answer_question_with_gpt(question):
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Simplify the prompt to avoid exceeding token limits
latest_entries = df['tokenized'].tail(10).tolist() # Limit to the last 10 entries for context
prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
attention_mask = inputs['attention_mask']
input_ids = inputs['input_ids']
generated_ids = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=len(input_ids[0]) + 100,
temperature=0.3,
top_p=0.9,
no_repeat_ngram_size=2
)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# Extract the response after "Answer:"
response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
final_response = response_part.split(".")[0] + "."
return final_response
# Function to add feedback
def add_feedback(name, feedback):
global df
if name in df['Group'].values:
df.loc[df['Group'] == name, 'Feedback'] = feedback
return "Feedback successfully added."
else:
return "Data not found in DataFrame."
with gr.Blocks() as demo:
gr.Markdown("# Data Query and Feedback System")
with gr.Row():
with gr.Column():
question_input = gr.Textbox(label="Ask a Question")
answer_output = gr.Textbox(label="Answer", interactive=False)
ask_button = gr.Button("Ask")
with gr.Column():
name_input = gr.Textbox(label="Name for Feedback")
feedback_input = gr.Textbox(label="Feedback")
feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
submit_button = gr.Button("Submit Feedback")
ask_button.click(fn=answer_question_with_gpt, inputs=question_input, outputs=answer_output)
submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)
demo.launch()
|