NavyaNayer commited on
Commit
d2be2aa
·
verified ·
1 Parent(s): 20ccf8e

Delete intent_classifier.py

Browse files
Files changed (1) hide show
  1. intent_classifier.py +0 -102
intent_classifier.py DELETED
@@ -1,102 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.optim as optim
4
- from torch.utils.data import Dataset, DataLoader
5
- from transformers import BertTokenizer, BertForSequenceClassification
6
- from datasets import load_dataset
7
- from tqdm import tqdm
8
- from sklearn.metrics import accuracy_score, precision_recall_fscore_support
9
-
10
- # Check for CUDA
11
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
- print(device)
13
-
14
- # Load CLINC-OOS Dataset (Correct Config)
15
- dataset = load_dataset("clinc_oos", "plus")
16
-
17
- # Tokenizer
18
- tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
19
-
20
- # Preprocess Dataset
21
- class IntentDataset(Dataset):
22
- def __init__(self, dataset_split):
23
- self.texts = dataset_split["text"]
24
- self.labels = dataset_split["intent"]
25
- self.label_map = {label: i for i, label in enumerate(set(self.labels))} # Create label mapping
26
-
27
- def __len__(self):
28
- return len(self.texts)
29
-
30
- def __getitem__(self, idx):
31
- inputs = tokenizer(self.texts[idx], padding="max_length", truncation=True, max_length=64, return_tensors="pt")
32
- label = self.labels[idx]
33
- if label not in self.label_map:
34
- raise ValueError(f"Unexpected label {label} found in dataset") # Debugging step
35
- return {key: val.squeeze(0) for key, val in inputs.items()}, torch.tensor(self.label_map[label])
36
-
37
- # Create Dataloaders
38
- batch_size = 16
39
- train_dataset = IntentDataset(dataset["train"])
40
- test_dataset = IntentDataset(dataset["test"])
41
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
42
- test_loader = DataLoader(test_dataset, batch_size=batch_size)
43
-
44
- # Load Pretrained BERT Model
45
- num_labels = len(set(dataset["train"]["intent"]))
46
- model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels).to(device)
47
-
48
- # Loss & Optimizer
49
- criterion = nn.CrossEntropyLoss()
50
- optimizer = optim.AdamW(model.parameters(), lr=2e-5)
51
-
52
- # Training Loop
53
- num_epochs = 3
54
- for epoch in range(num_epochs):
55
- model.train()
56
- total_loss = 0
57
- correct = 0
58
- total = 0
59
-
60
- for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} Training"):
61
- inputs, labels = batch
62
- inputs = {key: val.to(device) for key, val in inputs.items()}
63
- labels = labels.to(device)
64
-
65
- optimizer.zero_grad()
66
- outputs = model(**inputs).logits
67
- loss = criterion(outputs, labels)
68
- loss.backward()
69
- optimizer.step()
70
-
71
- total_loss += loss.item()
72
- correct += (outputs.argmax(dim=1) == labels).sum().item()
73
- total += labels.size(0)
74
-
75
- train_accuracy = correct / total
76
- print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
77
-
78
- # Evaluation on Test Set
79
- model.eval()
80
- all_preds, all_labels = [], []
81
-
82
- with torch.no_grad():
83
- for batch in tqdm(test_loader, desc="Testing"):
84
- inputs, labels = batch
85
- inputs = {key: val.to(device) for key, val in inputs.items()}
86
- labels = labels.to(device)
87
-
88
- outputs = model(**inputs).logits
89
- preds = outputs.argmax(dim=1)
90
-
91
- all_preds.extend(preds.cpu().numpy())
92
- all_labels.extend(labels.cpu().numpy())
93
-
94
- # Compute Metrics
95
- accuracy = accuracy_score(all_labels, all_preds)
96
- precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="weighted")
97
-
98
- print(f"Test Accuracy: {accuracy:.4f}")
99
- print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")
100
-
101
- # Save Model
102
- torch.save(model.state_dict(), "intent_classifier.pth")