Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,9 +16,15 @@ import spaces
|
|
16 |
# Use dotenv to load the environment variables
|
17 |
load_dotenv()
|
18 |
|
19 |
-
# Get
|
20 |
HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
if not HF_TOKEN:
|
23 |
print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
|
24 |
|
@@ -162,13 +168,13 @@ LABEL_MAPPING = {
|
|
162 |
|
163 |
# Load and prepare the dataset
|
164 |
print("Loading dataset...")
|
165 |
-
dataset = load_dataset(
|
166 |
|
167 |
# Convert string labels to integers
|
168 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
169 |
|
170 |
# Split dataset
|
171 |
-
train_test = dataset["train"].train_test_split(test_size=
|
172 |
train_dataset = train_test["train"]
|
173 |
test_dataset = train_test["test"]
|
174 |
|
|
|
16 |
# Use dotenv to load the environment variables
|
17 |
load_dotenv()
|
18 |
|
19 |
+
# Get environment variables
|
20 |
HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
|
21 |
+
DATASET_NAME = os.getenv("DATASET_NAME", "QuotaClimat/frugalaichallenge-text-train") # Default to public dataset
|
22 |
+
TEST_SIZE = float(os.getenv("TEST_SIZE", "0.2")) # Default to 20% test size
|
23 |
+
TEST_SEED = int(os.getenv("TEST_SEED", "42")) # Default seed for reproducibility
|
24 |
+
|
25 |
+
print(f"Using dataset: {DATASET_NAME}")
|
26 |
+
print(f"Test split size: {TEST_SIZE}")
|
27 |
+
|
28 |
if not HF_TOKEN:
|
29 |
print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
|
30 |
|
|
|
168 |
|
169 |
# Load and prepare the dataset
|
170 |
print("Loading dataset...")
|
171 |
+
dataset = load_dataset(DATASET_NAME)
|
172 |
|
173 |
# Convert string labels to integers
|
174 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
175 |
|
176 |
# Split dataset
|
177 |
+
train_test = dataset["train"].train_test_split(test_size=TEST_SIZE, seed=TEST_SEED)
|
178 |
train_dataset = train_test["train"]
|
179 |
test_dataset = train_test["test"]
|
180 |
|