Faizal2805 commited on
Commit
0334947
·
verified ·
1 Parent(s): adfa174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -99,7 +99,7 @@ custom_data = [
99
  dataset_custom = load_dataset("json", data_files={"train": custom_data})
100
 
101
  # Merge with OpenWebText dataset
102
- dataset = load_dataset("Skylion007/openwebtext", split="train[:10%]") # Load 5% to avoid streaming issues
103
 
104
  # Tokenization function
105
  def tokenize_function(examples):
 
99
  dataset_custom = load_dataset("json", data_files={"train": custom_data})
100
 
101
  # Merge with OpenWebText dataset
102
+ dataset = load_dataset("Skylion007/openwebtext", split="train[:20%]") # Load 5% to avoid streaming issues
103
 
104
  # Tokenization function
105
  def tokenize_function(examples):