Jiayi05 commited on
Commit
b275ea2
·
verified ·
1 Parent(s): 4cfa7c5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -13
README.md CHANGED
@@ -5,7 +5,7 @@ The following code load and test the models on colab notebook.
5
 
6
  ---
7
 
8
- ## Prerequisites
9
 
10
  1. Import the required Python packages:
11
 
@@ -28,10 +28,11 @@ from huggingface_hub import login
28
  login("Replace with the key")
29
  ```
30
 
31
- # Define the preprocessing and dataset class
32
 
33
- 1. Run the following preprocessing code
34
 
 
35
  class NewsDataset(Dataset):
36
  def __init__(self, texts, labels, tokenizer, max_len=128):
37
  self.texts = texts
@@ -75,33 +76,41 @@ def preprocess_text(text):
75
  text = text.lower()
76
  text = ' '.join(text.split())
77
  return text
 
78
 
79
 
80
-
81
- # Step 1: Load the model and tokenizer from Hugging Face Hub
82
  print("Loading model and tokenizer...")
83
- REPO_NAME = "CIS5190GoGo/CustomModel" # Replace with your repo name on Hugging Face Hub
84
  model = RobertaForSequenceClassification.from_pretrained(REPO_NAME)
85
  tokenizer = RobertaTokenizer.from_pretrained(REPO_NAME)
86
 
87
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
88
  model.to(device)
89
  print("Model and tokenizer loaded successfully!")
 
90
 
91
- # Step 2: Load test dataset
 
92
  print("Loading test data...")
93
  test_data_path = "/content/drive/MyDrive/5190_project/test_data_random_subset.csv" # Replace with your test set path
94
  test_data = pd.read_csv(test_data_path)
95
-
96
- # Preprocess test data
 
97
  X_test = test_data['title'].apply(preprocess_text).values
98
  y_test = test_data['labels'].values
 
99
 
100
- # Step 3: Prepare the dataset and dataloader
 
101
  test_dataset = NewsDataset(X_test, y_test, tokenizer)
102
  test_loader = DataLoader(test_dataset, batch_size=16, num_workers=2)
 
103
 
104
- # Step 4: Evaluate the model
 
105
  print("Evaluating the model...")
106
  model.eval()
107
  all_preds, all_labels = [], []
@@ -118,6 +127,6 @@ with torch.no_grad():
118
  all_preds.extend(preds.cpu().numpy())
119
  all_labels.extend(labels.cpu().numpy())
120
 
121
- # Step 5: Calculate accuracy
122
  accuracy = accuracy_score(all_labels, all_preds)
123
- print(f"Test Accuracy: {accuracy:.4f}")
 
 
5
 
6
  ---
7
 
8
+ # Step 1: Prerequisites
9
 
10
  1. Import the required Python packages:
11
 
 
28
  login("Replace with the key")
29
  ```
30
 
31
+ # Step 2: Define the preprocessing and dataset class
32
 
33
+ 1. Run the following class and functions designed to preprocess the test data
34
 
35
+ ```python
36
  class NewsDataset(Dataset):
37
  def __init__(self, texts, labels, tokenizer, max_len=128):
38
  self.texts = texts
 
76
  text = text.lower()
77
  text = ' '.join(text.split())
78
  return text
79
+ ```
80
 
81
 
82
+ # Step 3: Load the model and tokenizer from Hugging Face Hub
83
+ ```python
84
  print("Loading model and tokenizer...")
85
+ REPO_NAME = "CIS5190GoGo/CustomModel" #This is where we pushed the model to
86
  model = RobertaForSequenceClassification.from_pretrained(REPO_NAME)
87
  tokenizer = RobertaTokenizer.from_pretrained(REPO_NAME)
88
 
89
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
90
  model.to(device)
91
  print("Model and tokenizer loaded successfully!")
92
+ ```
93
 
94
+ # Step 4: Load test dataset
95
+ ```python
96
  print("Loading test data...")
97
  test_data_path = "/content/drive/MyDrive/5190_project/test_data_random_subset.csv" # Replace with your test set path
98
  test_data = pd.read_csv(test_data_path)
99
+ ```
100
+ # Step 5: Preprocess test data
101
+ ```python
102
  X_test = test_data['title'].apply(preprocess_text).values
103
  y_test = test_data['labels'].values
104
+ ```
105
 
106
+ # Step 6: Prepare the dataset and dataloader
107
+ ```python
108
  test_dataset = NewsDataset(X_test, y_test, tokenizer)
109
  test_loader = DataLoader(test_dataset, batch_size=16, num_workers=2)
110
+ ```
111
 
112
+ # Step 7: Evaluate the model and calculate accuracy
113
+ ```python
114
  print("Evaluating the model...")
115
  model.eval()
116
  all_preds, all_labels = [], []
 
127
  all_preds.extend(preds.cpu().numpy())
128
  all_labels.extend(labels.cpu().numpy())
129
 
 
130
  accuracy = accuracy_score(all_labels, all_preds)
131
+ print(f"Test Accuracy: {accuracy:.4f}")
132
+ ```