Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,24 +6,26 @@ from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
|
6 |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
7 |
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
8 |
|
9 |
-
# Dados iniciais
|
10 |
-
data = {
|
11 |
-
'Nome': ['Alice', 'Bob', 'Charlie'],
|
12 |
-
'Idade': [25, 30, 35],
|
13 |
-
'Cidade': ['Nova York', 'Los Angeles', 'Chicago'],
|
14 |
-
'Feedback': [None, None, None]
|
15 |
-
}
|
16 |
-
#df = pd.DataFrame(data)
|
17 |
-
|
18 |
df = pd.read_csv('anomalies.csv')
|
19 |
df['Feedback'] = None
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# Função para responder perguntas com GPT-2
|
22 |
def answer_question_with_gpt(question):
|
23 |
if tokenizer.pad_token is None:
|
24 |
tokenizer.pad_token = tokenizer.eos_token
|
25 |
|
26 |
-
prompt = f"Considerando
|
27 |
inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
|
28 |
attention_mask = inputs['attention_mask']
|
29 |
input_ids = inputs['input_ids']
|
|
|
6 |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
7 |
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
df = pd.read_csv('anomalies.csv')
|
10 |
df['Feedback'] = None
|
11 |
|
12 |
+
# Preprocessing steps
|
13 |
+
df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') # Format the datetime values
|
14 |
+
df['real'] = df['real'].apply(lambda x: f"{x:.2f}") # Format the float values to two decimal places
|
15 |
+
|
16 |
+
# Convert each row into a structured natural language sentence
|
17 |
+
def tokenize_row(row):
|
18 |
+
return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."
|
19 |
+
|
20 |
+
# Apply the tokenization function to each row
|
21 |
+
df['tokenized'] = df.apply(tokenize_row, axis=1)
|
22 |
+
|
23 |
# Função para responder perguntas com GPT-2
|
24 |
def answer_question_with_gpt(question):
|
25 |
if tokenizer.pad_token is None:
|
26 |
tokenizer.pad_token = tokenizer.eos_token
|
27 |
|
28 |
+
prompt = f"Considerando as seguintes sentenças: {df['tokenized'].to_string(index=False)}. Pergunta: {question} Resposta:"
|
29 |
inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
|
30 |
attention_mask = inputs['attention_mask']
|
31 |
input_ids = inputs['input_ids']
|