Spaces:
Sleeping
Sleeping
update
Browse files- tasks/text.py +4 -2
tasks/text.py
CHANGED
@@ -51,6 +51,8 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
51 |
# Split dataset
|
52 |
train_test = dataset["train"]
|
53 |
test_dataset = dataset["test"]
|
|
|
|
|
54 |
|
55 |
# Start tracking emissions
|
56 |
tracker.start()
|
@@ -68,10 +70,10 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
68 |
|
69 |
def tokenize_function(examples):
|
70 |
return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
|
71 |
-
|
72 |
# Tokenize the test dataset
|
73 |
tokenized_test = test_dataset.map(tokenize_function, batched=True)
|
74 |
-
|
75 |
# Create DataLoader
|
76 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
77 |
dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
|
|
|
51 |
# Split dataset
|
52 |
train_test = dataset["train"]
|
53 |
test_dataset = dataset["test"]
|
54 |
+
print('dataset type: ' , test_dataset.column_names) # Debugging step
|
55 |
+
print('dataset type: ' , test_dataset['quote'][:5]) # Debugging step
|
56 |
|
57 |
# Start tracking emissions
|
58 |
tracker.start()
|
|
|
70 |
|
71 |
def tokenize_function(examples):
|
72 |
return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
|
73 |
+
print('BEFORE TOKENIZING')
|
74 |
# Tokenize the test dataset
|
75 |
tokenized_test = test_dataset.map(tokenize_function, batched=True)
|
76 |
+
print('AFTER TOKENIZING')
|
77 |
# Create DataLoader
|
78 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
79 |
dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
|