<<<<<<< HEAD import torch import random import numpy as np from tqdm import tqdm from datasets import load_dataset from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification from torch.utils.data import DataLoader from transformers import AdamW from sklearn.metrics import r2_score, f1_score, mean_absolute_error # Set random seed for reproducibility torch.manual_seed(42) np.random.seed(42) random.seed(42) # Load DEITA-Complexity dataset dataset = load_dataset("hkust-nlp/deita-complexity-scorer-data") val_data = dataset["validation"] # Initialize tokenizer tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") # Preprocessing function def preprocess_function(examples): return tokenizer(examples["input"], truncation=True, padding="max_length", max_length=128) # Tokenize validation dataset val_encodings = val_data.map(preprocess_function, batched=True) # Inspect the structure of val_encodings print("Validation Encodings Structure:") print(val_encodings) # Convert dataset to PyTorch format class ComplexityDataset(torch.utils.data.Dataset): def __init__(self, encodings): self.encodings = encodings def __len__(self): return len(self.encodings['input_ids']) def __getitem__(self, idx): # Create a dictionary for the inputs item = { "input_ids": torch.tensor(self.encodings['input_ids'][idx]), "attention_mask": torch.tensor(self.encodings['attention_mask'][idx]), # Convert target to float if it's a string "labels": torch.tensor(float(self.encodings['target'][idx]), dtype=torch.float) # Ensure 'target' is numeric } return item val_dataset = ComplexityDataset(val_encodings) # Load pre-trained DistilBERT model model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=1) # Freeze first 4 transformer layers for layer in model.distilbert.transformer.layer[:4]: for param in layer.parameters(): param.requires_grad = False # Define optimizer optimizer = AdamW(model.parameters(), lr=2e-5) # Use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # DataLoader for batching val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False) # Evaluation function def evaluate_model(model, val_loader): model.eval() val_loss = 0.0 total_mae = 0.0 all_predictions = [] all_labels = [] with torch.no_grad(): for batch in tqdm(val_loader, desc="Evaluating", leave=False): batch = {key: val.to(device) for key, val in batch.items()} outputs = model(**batch) loss = torch.nn.functional.mse_loss(outputs.logits.squeeze(), batch["labels"]) val_loss += loss.item() total_mae += torch.nn.functional.l1_loss(outputs.logits.squeeze(), batch["labels"], reduction="sum").item() all_predictions.extend(outputs.logits.squeeze().cpu().numpy()) all_labels.extend(batch["labels"].cpu().numpy()) avg_val_loss = val_loss / len(val_loader) avg_val_mae = total_mae / len(val_loader.dataset) # Calculate additional metrics r2 = r2_score(all_labels, all_predictions) f1 = f1_score(np.round(all_labels), np.round(all_predictions), average='weighted') return avg_val_loss, avg_val_mae, r2, f1, all_predictions, all_labels # Evaluate the model val_loss, val_mae, r2, f1, predictions, labels = evaluate_model(model, val_loader) print(f"Validation Loss = {val_loss:.4f}, Validation MAE = {val_mae:.4f}, R² Score = {r2:.4f}, F1 Score = {f1:.4f}") # Testing the model (inference on the validation set) def test_model(model, val_loader): model.eval() all_predictions = [] all_labels = [] with torch.no_grad(): for batch in tqdm(val_loader, desc="Testing", leave=False): batch = {key: val.to(device) for key, val in batch.items()} outputs = model(**batch) all_predictions.extend(outputs.logits.squeeze().cpu().numpy()) all_labels.extend(batch["labels"].cpu().numpy()) return np.array(all_predictions), np.array(all_labels) # Get predictions and labels from the test function test_predictions, test_labels = test_model(model, val_loader) # You can also calculate the evaluation metrics on the test predictions test_r2 = r2_score(test_labels, test_predictions) test_f1 = f1_score(np.round(test_labels), np.round(test_predictions), average='weighted') print(f"Test R² Score = {test_r2:.4f}, Test F1 Score = {test_f1:.4f}") # Save the fine-tuned model model.save_pretrained("fine_tuned_deita_model") tokenizer.save_pretrained("fine_tuned_deita_model") print("✅ Evaluation and testing complete! Model saved at 'fine_tuned_deita_model'.") ======= import torch import random import numpy as np from tqdm import tqdm from datasets import load_dataset from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification from torch.utils.data import DataLoader from transformers import AdamW from sklearn.metrics import r2_score, f1_score, mean_absolute_error # Set random seed for reproducibility torch.manual_seed(42) np.random.seed(42) random.seed(42) # Load DEITA-Complexity dataset dataset = load_dataset("hkust-nlp/deita-complexity-scorer-data") val_data = dataset["validation"] # Initialize tokenizer tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") # Preprocessing function def preprocess_function(examples): return tokenizer(examples["input"], truncation=True, padding="max_length", max_length=128) # Tokenize validation dataset val_encodings = val_data.map(preprocess_function, batched=True) # Inspect the structure of val_encodings print("Validation Encodings Structure:") print(val_encodings) # Convert dataset to PyTorch format class ComplexityDataset(torch.utils.data.Dataset): def __init__(self, encodings): self.encodings = encodings def __len__(self): return len(self.encodings['input_ids']) def __getitem__(self, idx): # Create a dictionary for the inputs item = { "input_ids": torch.tensor(self.encodings['input_ids'][idx]), "attention_mask": torch.tensor(self.encodings['attention_mask'][idx]), # Convert target to float if it's a string "labels": torch.tensor(float(self.encodings['target'][idx]), dtype=torch.float) # Ensure 'target' is numeric } return item val_dataset = ComplexityDataset(val_encodings) # Load pre-trained DistilBERT model model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=1) # Freeze first 4 transformer layers for layer in model.distilbert.transformer.layer[:4]: for param in layer.parameters(): param.requires_grad = False # Define optimizer optimizer = AdamW(model.parameters(), lr=2e-5) # Use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # DataLoader for batching val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False) # Evaluation function def evaluate_model(model, val_loader): model.eval() val_loss = 0.0 total_mae = 0.0 all_predictions = [] all_labels = [] with torch.no_grad(): for batch in tqdm(val_loader, desc="Evaluating", leave=False): batch = {key: val.to(device) for key, val in batch.items()} outputs = model(**batch) loss = torch.nn.functional.mse_loss(outputs.logits.squeeze(), batch["labels"]) val_loss += loss.item() total_mae += torch.nn.functional.l1_loss(outputs.logits.squeeze(), batch["labels"], reduction="sum").item() all_predictions.extend(outputs.logits.squeeze().cpu().numpy()) all_labels.extend(batch["labels"].cpu().numpy()) avg_val_loss = val_loss / len(val_loader) avg_val_mae = total_mae / len(val_loader.dataset) # Calculate additional metrics r2 = r2_score(all_labels, all_predictions) f1 = f1_score(np.round(all_labels), np.round(all_predictions), average='weighted') return avg_val_loss, avg_val_mae, r2, f1, all_predictions, all_labels # Evaluate the model val_loss, val_mae, r2, f1, predictions, labels = evaluate_model(model, val_loader) print(f"Validation Loss = {val_loss:.4f}, Validation MAE = {val_mae:.4f}, R² Score = {r2:.4f}, F1 Score = {f1:.4f}") # Testing the model (inference on the validation set) def test_model(model, val_loader): model.eval() all_predictions = [] all_labels = [] with torch.no_grad(): for batch in tqdm(val_loader, desc="Testing", leave=False): batch = {key: val.to(device) for key, val in batch.items()} outputs = model(**batch) all_predictions.extend(outputs.logits.squeeze().cpu().numpy()) all_labels.extend(batch["labels"].cpu().numpy()) return np.array(all_predictions), np.array(all_labels) # Get predictions and labels from the test function test_predictions, test_labels = test_model(model, val_loader) # You can also calculate the evaluation metrics on the test predictions test_r2 = r2_score(test_labels, test_predictions) test_f1 = f1_score(np.round(test_labels), np.round(test_predictions), average='weighted') print(f"Test R² Score = {test_r2:.4f}, Test F1 Score = {test_f1:.4f}") # Save the fine-tuned model model.save_pretrained("fine_tuned_deita_model") tokenizer.save_pretrained("fine_tuned_deita_model") print("✅ Evaluation and testing complete! Model saved at 'fine_tuned_deita_model'.") >>>>>>> b1313c5d084e410cadf261f2fafd8929cb149a4f