NaolTaye commited on
Commit
c8df9ce
·
1 Parent(s): 261ff27
Files changed (1) hide show
  1. tasks/text.py +4 -2
tasks/text.py CHANGED
@@ -51,6 +51,8 @@ async def evaluate_text(request: TextEvaluationRequest):
51
  # Split dataset
52
  train_test = dataset["train"]
53
  test_dataset = dataset["test"]
 
 
54
 
55
  # Start tracking emissions
56
  tracker.start()
@@ -68,10 +70,10 @@ async def evaluate_text(request: TextEvaluationRequest):
68
 
69
  def tokenize_function(examples):
70
  return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
71
-
72
  # Tokenize the test dataset
73
  tokenized_test = test_dataset.map(tokenize_function, batched=True)
74
-
75
  # Create DataLoader
76
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
77
  dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
 
51
  # Split dataset
52
  train_test = dataset["train"]
53
  test_dataset = dataset["test"]
54
+ print('dataset type: ' , test_dataset.column_names) # Debugging step
55
+ print('dataset type: ' , test_dataset['quote'][:5]) # Debugging step
56
 
57
  # Start tracking emissions
58
  tracker.start()
 
70
 
71
  def tokenize_function(examples):
72
  return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
73
+ print('BEFORE TOKENIZING')
74
  # Tokenize the test dataset
75
  tokenized_test = test_dataset.map(tokenize_function, batched=True)
76
+ print('AFTER TOKENIZING')
77
  # Create DataLoader
78
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
79
  dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)