HF_Python / testSampleCode.py
Reyad-Ahmmed's picture
Upload testSampleCode.py with huggingface_hub
e2bac03 verified
import pandas as pd
from sklearn.model_selection import train_test_split
dataFromCsv = pd.read_csv('test.csv')
text = dataFromCsv['text'].tolist()
label = dataFromCsv['label'].tolist()
print(text)
print(label)
sorted_lable = sorted((dataFromCsv['label']).unique())
print(sorted_lable)
lableList = {label: i for i, label in enumerate(sorted_lable)}
dataFromCsv['label'] = dataFromCsv['label'].map(lableList)
print("label list = ",lableList)
print("from csv file label = ",dataFromCsv['label'])
# Split the dataset
train_df, test_df = train_test_split(dataFromCsv, test_size=0.2, random_state=42)
print("Training Set:")
print(train_df)
print("\nTesting Set:")
print(test_df)
lableList = { 0: "lastmonth", 1: "nextweek", 2: "sevendays", 3: "today", 4: "yesterday" }
print("After = ", lableList[4])
print("label items = ", lableList.items())
#test of dataset
# from torch.utils.data import Dataset
# import torch
# class IntentDataset(Dataset):
# def __init__(self, encodings, labels):
# self.encodings = encodings
# self.labels = labels
# def __getitem__(self, idx):
# item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
# label = self.labels[idx]
# item['labels'] = torch.tensor(self.labels[idx])
# return item
# def __len__(self):
# return len(self.labels)
# # Sample data
# encodings = {
# 'input_ids': [[101, 102, 103], [104, 105, 106], [107, 108, 109]],
# 'attention_mask': [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
# }
# labels = [0, 1, 0]
# dataset = IntentDataset(encodings, labels)
# dataset_length = len(dataset)
# print(f"The dataset contains {dataset_length} items. {dataset.labels}")
# dataset_show = dataset[2]
# print(dataset_show)