Spaces:
Runtime error
Runtime error
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
dataFromCsv = pd.read_csv('test.csv') | |
text = dataFromCsv['text'].tolist() | |
label = dataFromCsv['label'].tolist() | |
print(text) | |
print(label) | |
sorted_lable = sorted((dataFromCsv['label']).unique()) | |
print(sorted_lable) | |
lableList = {label: i for i, label in enumerate(sorted_lable)} | |
dataFromCsv['label'] = dataFromCsv['label'].map(lableList) | |
print("label list = ",lableList) | |
print("from csv file label = ",dataFromCsv['label']) | |
# Split the dataset | |
train_df, test_df = train_test_split(dataFromCsv, test_size=0.2, random_state=42) | |
print("Training Set:") | |
print(train_df) | |
print("\nTesting Set:") | |
print(test_df) | |
lableList = { 0: "lastmonth", 1: "nextweek", 2: "sevendays", 3: "today", 4: "yesterday" } | |
print("After = ", lableList[4]) | |
print("label items = ", lableList.items()) | |
#test of dataset | |
# from torch.utils.data import Dataset | |
# import torch | |
# class IntentDataset(Dataset): | |
# def __init__(self, encodings, labels): | |
# self.encodings = encodings | |
# self.labels = labels | |
# def __getitem__(self, idx): | |
# item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} | |
# label = self.labels[idx] | |
# item['labels'] = torch.tensor(self.labels[idx]) | |
# return item | |
# def __len__(self): | |
# return len(self.labels) | |
# # Sample data | |
# encodings = { | |
# 'input_ids': [[101, 102, 103], [104, 105, 106], [107, 108, 109]], | |
# 'attention_mask': [[1, 1, 1], [1, 1, 1], [1, 1, 1]] | |
# } | |
# labels = [0, 1, 0] | |
# dataset = IntentDataset(encodings, labels) | |
# dataset_length = len(dataset) | |
# print(f"The dataset contains {dataset_length} items. {dataset.labels}") | |
# dataset_show = dataset[2] | |
# print(dataset_show) | |