fschwartzer commited on
Commit
07fef94
·
verified ·
1 Parent(s): 75112d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -20
app.py CHANGED
@@ -1,49 +1,45 @@
1
  import pandas as pd
2
  import gradio as gr
3
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
4
 
5
- # Load the model and tokenizer for GPT-2
6
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
7
- model = GPT2LMHeadModel.from_pretrained('gpt2')
8
 
 
9
  df = pd.read_csv('anomalies.csv')
10
- df['Feedback'] = "" # Initialize with empty strings instead of None for consistency
 
 
11
 
12
- # Preprocessing steps
13
- df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') # Format the datetime values
14
- df['real'] = df['real'].apply(lambda x: f"{x:.2f}") # Format the float values to two decimal places
15
-
16
- # Convert each row into a structured natural language sentence
17
  def tokenize_row(row):
18
  return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."
19
 
20
- # Apply the tokenization function to each row
21
  df['tokenized'] = df.apply(tokenize_row, axis=1)
22
  print(df)
23
 
24
- # Function to respond to questions with GPT-2
25
- def answer_question_with_gpt(question):
26
  if tokenizer.pad_token is None:
27
  tokenizer.pad_token = tokenizer.eos_token
28
 
29
- # Simplify the prompt to avoid exceeding token limits
30
- latest_entries = df['tokenized'].tail(10).tolist() # Limit to the last 10 entries for context
31
  prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
32
  inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
33
  attention_mask = inputs['attention_mask']
34
  input_ids = inputs['input_ids']
35
-
36
  generated_ids = model.generate(
37
  input_ids,
38
  attention_mask=attention_mask,
39
- max_length=len(input_ids[0]) + 100,
40
- temperature=0.3,
41
  top_p=0.9,
42
  no_repeat_ngram_size=2
43
  )
44
 
45
  generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
46
- # Extract the response after "Answer:"
47
  response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
48
  final_response = response_part.split(".")[0] + "."
49
  return final_response
@@ -70,7 +66,7 @@ with gr.Blocks() as demo:
70
  feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
71
  submit_button = gr.Button("Submit Feedback")
72
 
73
- ask_button.click(fn=answer_question_with_gpt, inputs=question_input, outputs=answer_output)
74
  submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)
75
 
76
  demo.launch()
 
1
  import pandas as pd
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
+ # Load the model and tokenizer for Meta LLaMA 3.1
6
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
7
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
8
 
9
+ # Load and preprocess the DataFrame
10
  df = pd.read_csv('anomalies.csv')
11
+ df['Feedback'] = ""
12
+ df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d')
13
+ df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
14
 
15
+ # Convert data rows to sentences
 
 
 
 
16
  def tokenize_row(row):
17
  return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."
18
 
 
19
  df['tokenized'] = df.apply(tokenize_row, axis=1)
20
  print(df)
21
 
22
+ # Function to generate a response based on the latest data entries
23
+ def answer_question_with_llama(question):
24
  if tokenizer.pad_token is None:
25
  tokenizer.pad_token = tokenizer.eos_token
26
 
27
+ latest_entries = df['tokenized'].tail(10).tolist()
 
28
  prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
29
  inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
30
  attention_mask = inputs['attention_mask']
31
  input_ids = inputs['input_ids']
32
+
33
  generated_ids = model.generate(
34
  input_ids,
35
  attention_mask=attention_mask,
36
+ max_length=512, # Adjusted to match typical model capacity
37
+ temperature=0.7, # Adjust temperature for diversity
38
  top_p=0.9,
39
  no_repeat_ngram_size=2
40
  )
41
 
42
  generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
 
43
  response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
44
  final_response = response_part.split(".")[0] + "."
45
  return final_response
 
66
  feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
67
  submit_button = gr.Button("Submit Feedback")
68
 
69
+ ask_button.click(fn=answer_question_with_llama, inputs=question_input, outputs=answer_output)
70
  submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)
71
 
72
  demo.launch()