dheeraj1019 commited on
Commit
d2a1ee9
·
verified ·
1 Parent(s): 94c90a7

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +121 -0
README.md ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: afl-3.0
3
+ datasets:
4
+ - HuggingFaceTB/cosmopedia
5
+ metrics:
6
+ - accuracy
7
+ library_name: adapter-transformers
8
+ pipeline_tag: text-classification
9
+ tags:
10
+ - code
11
+ ---
12
+ # Install the necessary libraries
13
+ !pip install transformers
14
+ !pip install torch
15
+
16
+ import torch
17
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification, XLNetTokenizer, XLNetForSequenceClassification
18
+ from transformers import Trainer, TrainingArguments
19
+ from sklearn.model_selection import train_test_split
20
+ import numpy as np
21
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
22
+
23
+ # Example dataset for text classification (replace with your own dataset)
24
+ texts = [...] # List of input texts
25
+ labels = [...] # List of corresponding labels (0 or 1 for binary classification)
26
+
27
+ # Split the dataset into training and testing sets
28
+ train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)
29
+
30
+ # Define the tokenizer and model for RoBERTa
31
+ roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
32
+ roberta_model = RobertaForSequenceClassification.from_pretrained("roberta-base")
33
+
34
+ # Define the tokenizer and model for XLNet
35
+ xlnet_tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
36
+ xlnet_model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased")
37
+
38
+ # Tokenize and encode the training and testing sets
39
+ train_encodings_roberta = roberta_tokenizer(train_texts, truncation=True, padding=True)
40
+ test_encodings_roberta = roberta_tokenizer(test_texts, truncation=True, padding=True)
41
+
42
+ train_encodings_xlnet = xlnet_tokenizer(train_texts, truncation=True, padding=True)
43
+ test_encodings_xlnet = xlnet_tokenizer(test_texts, truncation=True, padding=True)
44
+
45
+ class MyDataset(torch.utils.data.Dataset):
46
+ def __init__(self, encodings, labels):
47
+ self.encodings = encodings
48
+ self.labels = labels
49
+
50
+ def __getitem__(self, idx):
51
+ item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
52
+ item['labels'] = torch.tensor(self.labels[idx])
53
+ return item
54
+
55
+ def __len__(self):
56
+ return len(self.labels)
57
+
58
+ train_dataset_roberta = MyDataset(train_encodings_roberta, train_labels)
59
+ test_dataset_roberta = MyDataset(test_encodings_roberta, test_labels)
60
+
61
+ train_dataset_xlnet = MyDataset(train_encodings_xlnet, train_labels)
62
+ test_dataset_xlnet = MyDataset(test_encodings_xlnet, test_labels)
63
+
64
+ # Fine-tune RoBERTa model
65
+ training_args = TrainingArguments(
66
+ per_device_train_batch_size=8,
67
+ per_device_eval_batch_size=8,
68
+ num_train_epochs=3,
69
+ logging_dir='./logs',
70
+ logging_steps=10,
71
+ )
72
+
73
+ trainer_roberta = Trainer(
74
+ model=roberta_model,
75
+ args=training_args,
76
+ train_dataset=train_dataset_roberta,
77
+ eval_dataset=test_dataset_roberta,
78
+ )
79
+
80
+ trainer_roberta.train()
81
+
82
+ # Fine-tune XLNet model
83
+ trainer_xlnet = Trainer(
84
+ model=xlnet_model,
85
+ args=training_args,
86
+ train_dataset=train_dataset_xlnet,
87
+ eval_dataset=test_dataset_xlnet,
88
+ )
89
+
90
+ trainer_xlnet.train()
91
+
92
+ # Evaluate models
93
+ def evaluate_model(model, test_dataset):
94
+ predictions = []
95
+ labels = []
96
+ for batch in test_dataset:
97
+ input_ids = batch['input_ids'].to(model.device)
98
+ attention_mask = batch['attention_mask'].to(model.device)
99
+ labels.extend(batch['labels'].tolist())
100
+ with torch.no_grad():
101
+ outputs = model(input_ids, attention_mask=attention_mask)
102
+ logits = outputs.logits
103
+ predictions.extend(torch.argmax(logits, axis=1).tolist())
104
+ accuracy = accuracy_score(labels, predictions)
105
+ precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
106
+ return accuracy, precision, recall, f1
107
+
108
+ accuracy_roberta, precision_roberta, recall_roberta, f1_roberta = evaluate_model(roberta_model, test_dataset_roberta)
109
+ accuracy_xlnet, precision_xlnet, recall_xlnet, f1_xlnet = evaluate_model(xlnet_model, test_dataset_xlnet)
110
+
111
+ print("RoBERTa Model Evaluation:")
112
+ print(f"Accuracy: {accuracy_roberta}")
113
+ print(f"Precision: {precision_roberta}")
114
+ print(f"Recall: {recall_roberta}")
115
+ print(f"F1 Score: {f1_roberta}")
116
+
117
+ print("\nXLNet Model Evaluation:")
118
+ print(f"Accuracy: {accuracy_xlnet}")
119
+ print(f"Precision: {precision_xlnet}")
120
+ print(f"Recall: {recall_xlnet}")
121
+ print(f"F1 Score: {f1_xlnet}")