File size: 3,091 Bytes
723fe48
 
1ec6af9
82871f4
92f07c6
1ec6af9
 
723fe48
be2fac2
92f07c6
723fe48
82a021e
 
 
 
 
 
 
 
 
 
 
92f07c6
1ec6af9
95dfd79
 
 
92f07c6
 
 
a200765
 
 
 
5e4ebb0
 
 
92f07c6
 
 
 
5e4ebb0
a200765
1ec6af9
92f07c6
 
 
45cb5c0
1ec6af9
92f07c6
 
723fe48
92f07c6
 
 
1ec6af9
92f07c6
723fe48
 
92f07c6
723fe48
 
92f07c6
 
 
723fe48
92f07c6
723fe48
92f07c6
 
1ec6af9
 
 
723fe48
92f07c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import gradio as gr
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the model and tokenizer for GPT-2
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

df = pd.read_csv('anomalies.csv')
df['Feedback'] = ""  # Initialize with empty strings instead of None for consistency

# Preprocessing steps
df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d')  # Format the datetime values
df['real'] = df['real'].apply(lambda x: f"{x:.2f}")  # Format the float values to two decimal places

# Convert each row into a structured natural language sentence
def tokenize_row(row):
    return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."

# Apply the tokenization function to each row
df['tokenized'] = df.apply(tokenize_row, axis=1)

# Function to respond to questions with GPT-2
def answer_question_with_gpt(question):
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Simplify the prompt to avoid exceeding token limits
    latest_entries = df['tokenized'].tail(10).tolist()  # Limit to the last 10 entries for context
    prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
    inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
    attention_mask = inputs['attention_mask']
    input_ids = inputs['input_ids']
    
    generated_ids = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=len(input_ids[0]) + 100,
        temperature=0.3,
        top_p=0.9,
        no_repeat_ngram_size=2
    )
    
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    # Extract the response after "Answer:"
    response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
    final_response = response_part.split(".")[0] + "."
    return final_response

# Function to add feedback
def add_feedback(name, feedback):
    global df
    if name in df['Group'].values:
        df.loc[df['Group'] == name, 'Feedback'] = feedback
        return "Feedback successfully added."
    else:
        return "Data not found in DataFrame."

with gr.Blocks() as demo:
    gr.Markdown("# Data Query and Feedback System")
    with gr.Row():
        with gr.Column():
            question_input = gr.Textbox(label="Ask a Question")
            answer_output = gr.Textbox(label="Answer", interactive=False)
            ask_button = gr.Button("Ask")
        with gr.Column():
            name_input = gr.Textbox(label="Name for Feedback")
            feedback_input = gr.Textbox(label="Feedback")
            feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
            submit_button = gr.Button("Submit Feedback")

    ask_button.click(fn=answer_question_with_gpt, inputs=question_input, outputs=answer_output)
    submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)

demo.launch()