import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import numpy as np

# Load your CSV file into a pandas DataFrame
df = pd.read_csv("dishTV_customer_service_with_address_and_rules_accurate_v2.csv")

# Print column names and first few rows to ensure data structure
print(df.columns)
print(df.head())

# Create a conversation column by merging the agent's and customer's utterances
df['Conversation'] = df['Agent Utterance'] + " " + df['Customer Utterance']

# Map labels for classification (Rule Followed, Question Asked, Question Answered)
df['Rule Followed'] = df['Rule Followed'].map({'Yes': 1, 'No': 0})
df['Question Asked'] = df['Question Asked'].map({'Yes': 1, 'No': 0})
df['Question Answered'] = df['Question Answered'].map({'Yes': 1, 'No': 0})

# Split data into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['Conversation'].tolist(), 
    df[['Rule Followed', 'Question Asked', 'Question Answered']].values, 
    test_size=0.2
)


# Initialize BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the conversations
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)

# Create PyTorch datasets
train_dataset = Dataset.from_dict({
    'input_ids': train_encodings['input_ids'],
    'attention_mask': train_encodings['attention_mask'],
    'labels': torch.tensor(train_labels, dtype=torch.float32)
})

val_dataset = Dataset.from_dict({
    'input_ids': val_encodings['input_ids'],
    'attention_mask': val_encodings['attention_mask'],
    'labels': torch.tensor(val_labels, dtype=torch.float32)
})


# Initialize the BERT model for multi-label classification (3 labels)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)


# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy='epoch',  # Evaluate at the end of each epoch
    save_strategy='epoch',       # Save model at the end of each epoch
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=500,
    save_steps=1000,  # Optional, you can keep this if you want to save every N steps (only used if save_strategy is 'steps')
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    do_train=True,
    do_eval=True
)


# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=lambda p: {
        'accuracy': np.mean(np.all(p.predictions.argmax(axis=-1) == p.label_ids, axis=1))
    }
)

# Start training
trainer.train()


# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")


# Define a new conversation for testing
new_conversation = ["Hello! How can I assist you today? I just wanted to check the status of my account."]

# Tokenize the new conversation
test_encodings = tokenizer(new_conversation, truncation=True, padding=True, max_length=512, return_tensors='pt')

# Make predictions
with torch.no_grad():
    model.eval()
    outputs = model(**test_encodings)
    predictions = torch.sigmoid(outputs.logits).cpu().numpy()  # Sigmoid for multi-label classification

# Display predictions
print(f"Predictions (Rule Followed, Question Asked, Question Answered): {predictions}")


# Round predictions (since we are doing binary classification for each label)
predictions_rounded = np.round(predictions)

print(f"Predictions (rounded): {predictions_rounded}")