Reyad-Ahmmed commited on
Commit
e2bac03
·
verified ·
1 Parent(s): 4d26001

Upload testSampleCode.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. testSampleCode.py +67 -0
testSampleCode.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+
4
+ dataFromCsv = pd.read_csv('test.csv')
5
+
6
+ text = dataFromCsv['text'].tolist()
7
+ label = dataFromCsv['label'].tolist()
8
+
9
+ print(text)
10
+ print(label)
11
+
12
+ sorted_lable = sorted((dataFromCsv['label']).unique())
13
+ print(sorted_lable)
14
+
15
+
16
+ lableList = {label: i for i, label in enumerate(sorted_lable)}
17
+ dataFromCsv['label'] = dataFromCsv['label'].map(lableList)
18
+
19
+ print("label list = ",lableList)
20
+ print("from csv file label = ",dataFromCsv['label'])
21
+
22
+
23
+
24
+ # Split the dataset
25
+ train_df, test_df = train_test_split(dataFromCsv, test_size=0.2, random_state=42)
26
+ print("Training Set:")
27
+ print(train_df)
28
+ print("\nTesting Set:")
29
+ print(test_df)
30
+
31
+ lableList = { 0: "lastmonth", 1: "nextweek", 2: "sevendays", 3: "today", 4: "yesterday" }
32
+
33
+ print("After = ", lableList[4])
34
+ print("label items = ", lableList.items())
35
+
36
+ #test of dataset
37
+ # from torch.utils.data import Dataset
38
+ # import torch
39
+
40
+ # class IntentDataset(Dataset):
41
+ # def __init__(self, encodings, labels):
42
+ # self.encodings = encodings
43
+ # self.labels = labels
44
+
45
+ # def __getitem__(self, idx):
46
+ # item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
47
+ # label = self.labels[idx]
48
+ # item['labels'] = torch.tensor(self.labels[idx])
49
+
50
+ # return item
51
+
52
+ # def __len__(self):
53
+ # return len(self.labels)
54
+
55
+ # # Sample data
56
+ # encodings = {
57
+ # 'input_ids': [[101, 102, 103], [104, 105, 106], [107, 108, 109]],
58
+ # 'attention_mask': [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
59
+ # }
60
+ # labels = [0, 1, 0]
61
+
62
+ # dataset = IntentDataset(encodings, labels)
63
+ # dataset_length = len(dataset)
64
+ # print(f"The dataset contains {dataset_length} items. {dataset.labels}")
65
+
66
+ # dataset_show = dataset[2]
67
+ # print(dataset_show)