fschwartzer commited on
Commit
92f07c6
·
verified ·
1 Parent(s): c42e079

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -26
app.py CHANGED
@@ -2,12 +2,12 @@ import pandas as pd
2
  import gradio as gr
3
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
4
 
5
- # Carregando o modelo e o tokenizador do GPT-2
6
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
7
  model = GPT2LMHeadModel.from_pretrained('gpt2')
8
 
9
  df = pd.read_csv('anomalies.csv')
10
- df['Feedback'] = None
11
 
12
  # Preprocessing steps
13
  df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') # Format the datetime values
@@ -20,12 +20,14 @@ def tokenize_row(row):
20
  # Apply the tokenization function to each row
21
  df['tokenized'] = df.apply(tokenize_row, axis=1)
22
 
23
- # Função para responder perguntas com GPT-2
24
  def answer_question_with_gpt(question):
25
  if tokenizer.pad_token is None:
26
  tokenizer.pad_token = tokenizer.eos_token
27
 
28
- prompt = f"Considering the follow: {df['tokenized']}. Question: {question} Resposta:"
 
 
29
  inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
30
  attention_mask = inputs['attention_mask']
31
  input_ids = inputs['input_ids']
@@ -33,42 +35,41 @@ def answer_question_with_gpt(question):
33
  generated_ids = model.generate(
34
  input_ids,
35
  attention_mask=attention_mask,
36
- max_length=len(input_ids[0]) + 100, # Aumentar o limite de geração
37
- temperature=0.5, # Ajustar a criatividade
38
- top_p=0.9, # Usar nucleus sampling
39
- no_repeat_ngram_size=2 # Evitar repetições desnecessárias
40
  )
41
 
42
  generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
43
- # Processando para extrair apenas a resposta após "Resposta:"
44
- response_part = generated_text.split("Resposta:")[1] if "Resposta:" in generated_text else "Resposta não encontrada."
45
- # Limpeza adicional para remover qualquer texto indesejado após a resposta
46
- final_response = response_part.split(".")[0] + "." # Isso assume que a resposta termina na primeira sentença.
47
  return final_response
48
 
49
- # Função para adicionar feedback
50
- def add_feedback(nome, feedback):
51
  global df
52
- if grupo in df['Group'].values:
53
- df.loc[df['Group'] == grupo, 'Feedback'] = feedback
54
- return "Feedback adicionado com sucesso."
55
  else:
56
- return "Dado não encontrado no DataFrame."
57
 
58
  with gr.Blocks() as demo:
59
- gr.Markdown("# Sistema de Consulta e Feedback de Dados")
60
  with gr.Row():
61
  with gr.Column():
62
- question_input = gr.Textbox(label="Faça uma Pergunta")
63
- answer_output = gr.Textbox(label="Resposta", interactive=False)
64
- ask_button = gr.Button("Perguntar")
65
  with gr.Column():
66
- name_input = gr.Textbox(label="Nome para Feedback")
67
  feedback_input = gr.Textbox(label="Feedback")
68
- feedback_result = gr.Textbox(label="Resultado do Feedback", interactive=False)
69
- submit_button = gr.Button("Enviar Feedback")
70
 
71
  ask_button.click(fn=answer_question_with_gpt, inputs=question_input, outputs=answer_output)
72
  submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)
73
 
74
- demo.launch()
 
2
  import gradio as gr
3
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
4
 
5
+ # Load the model and tokenizer for GPT-2
6
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
7
  model = GPT2LMHeadModel.from_pretrained('gpt2')
8
 
9
  df = pd.read_csv('anomalies.csv')
10
+ df['Feedback'] = "" # Initialize with empty strings instead of None for consistency
11
 
12
  # Preprocessing steps
13
  df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') # Format the datetime values
 
20
  # Apply the tokenization function to each row
21
  df['tokenized'] = df.apply(tokenize_row, axis=1)
22
 
23
+ # Function to respond to questions with GPT-2
24
  def answer_question_with_gpt(question):
25
  if tokenizer.pad_token is None:
26
  tokenizer.pad_token = tokenizer.eos_token
27
 
28
+ # Simplify the prompt to avoid exceeding token limits
29
+ latest_entries = df['tokenized'].tail(10).tolist() # Limit to the last 10 entries for context
30
+ prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
31
  inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
32
  attention_mask = inputs['attention_mask']
33
  input_ids = inputs['input_ids']
 
35
  generated_ids = model.generate(
36
  input_ids,
37
  attention_mask=attention_mask,
38
+ max_length=len(input_ids[0]) + 100,
39
+ temperature=0.3,
40
+ top_p=0.9,
41
+ no_repeat_ngram_size=2
42
  )
43
 
44
  generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
45
+ # Extract the response after "Answer:"
46
+ response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
47
+ final_response = response_part.split(".")[0] + "."
 
48
  return final_response
49
 
50
+ # Function to add feedback
51
+ def add_feedback(name, feedback):
52
  global df
53
+ if name in df['Group'].values:
54
+ df.loc[df['Group'] == name, 'Feedback'] = feedback
55
+ return "Feedback successfully added."
56
  else:
57
+ return "Data not found in DataFrame."
58
 
59
  with gr.Blocks() as demo:
60
+ gr.Markdown("# Data Query and Feedback System")
61
  with gr.Row():
62
  with gr.Column():
63
+ question_input = gr.Textbox(label="Ask a Question")
64
+ answer_output = gr.Textbox(label="Answer", interactive=False)
65
+ ask_button = gr.Button("Ask")
66
  with gr.Column():
67
+ name_input = gr.Textbox(label="Name for Feedback")
68
  feedback_input = gr.Textbox(label="Feedback")
69
+ feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
70
+ submit_button = gr.Button("Submit Feedback")
71
 
72
  ask_button.click(fn=answer_question_with_gpt, inputs=question_input, outputs=answer_output)
73
  submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)
74
 
75
+ demo.launch()