Spaces:
Sleeping
Sleeping
import gradio as gr | |
from codecarbon import EmissionsTracker | |
from datasets import load_dataset | |
import numpy as np | |
from sklearn.metrics import accuracy_score | |
import random | |
import os | |
import json | |
from datetime import datetime | |
from huggingface_hub import HfApi | |
from huggingface_hub import upload_file | |
import tempfile | |
from dotenv import load_dotenv | |
# Use dotenv to load the environment variables | |
load_dotenv() | |
# Get HF token from environment variable | |
HF_TOKEN = os.getenv("HF_TOKEN_TEXT") | |
print(HF_TOKEN) | |
if not HF_TOKEN: | |
print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.") | |
tracker = EmissionsTracker(allow_multiple_runs=True) | |
# Function to get space username and URL | |
def get_space_info(): | |
space_name = os.getenv("SPACE_ID", "") | |
if space_name: | |
try: | |
username = space_name.split("/")[0] | |
space_url = f"https://huggingface.co/spaces/{space_name}" | |
return username, space_url | |
except Exception as e: | |
print(f"Error getting space info: {e}") | |
return "local-user", "local-development" | |
def clean_emissions_data(emissions_data): | |
"""Remove unwanted fields from emissions data""" | |
data_dict = emissions_data.__dict__ | |
fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude'] | |
return {k: v for k, v in data_dict.items() if k not in fields_to_remove} | |
def evaluate(): | |
# Get space info | |
username, space_url = get_space_info() | |
# Initialize tracker | |
tracker.start() | |
tracker.start_task("inference") | |
# Make random predictions | |
true_labels = test_dataset["label"] | |
predictions = [random.randint(0, 7) for _ in range(len(true_labels))] | |
# Calculate accuracy | |
accuracy = accuracy_score(true_labels, predictions) | |
# Stop tracking emissions | |
emissions_data = tracker.stop_task() | |
# Prepare complete results | |
results = { | |
"username": username, | |
"space_url": space_url, | |
"submission_timestamp": datetime.now().isoformat(), | |
"accuracy": float(accuracy), | |
"energy_consumed_wh": emissions_data.energy_consumed * 1000, | |
"emissions_gco2eq": emissions_data.emissions * 1000, | |
"emissions_data": clean_emissions_data(emissions_data) | |
} | |
# Return both summary and detailed results | |
return [ | |
accuracy, | |
emissions_data.emissions * 1000, | |
emissions_data.energy_consumed * 1000, | |
json.dumps(results, indent=2) | |
] | |
def submit_results(results_json): | |
if not results_json: | |
return gr.Warning("No results to submit") | |
if not HF_TOKEN: | |
return gr.Warning("HF_TOKEN not found. Please set up your Hugging Face token.") | |
# try: | |
# results_json is already a string, no need to load it | |
results_str = json.dumps(results_json) # Parse the JSON string to get the data | |
# Create a temporary file with the results | |
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: | |
# Write the original JSON string to file | |
f.write(results_str) | |
temp_path = f.name | |
# Upload to the dataset | |
api = HfApi(token=HF_TOKEN) | |
path_in_repo = f"submissions/{results_json['username']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
api.upload_file( | |
path_or_fileobj=temp_path, | |
path_in_repo=path_in_repo, | |
repo_id="frugal-ai-challenge/public-leaderboard-text", | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
# Clean up | |
os.unlink(temp_path) | |
return gr.Info("Results submitted successfully to the leaderboard! 🎉") | |
# except Exception as e: | |
# return gr.Warning(f"Error submitting results: {str(e)}") | |
# Define the label mapping | |
LABEL_MAPPING = { | |
"0_not_relevant": 0, # No relevant claim detected | |
"1_not_happening": 1, # Global warming is not happening | |
"2_not_human": 2, # Not caused by humans | |
"3_not_bad": 3, # Not bad or beneficial | |
"4_solutions_harmful_unnecessary": 4, # Solutions harmful/unnecessary | |
"5_science_unreliable": 5, # Science is unreliable | |
"6_proponents_biased": 6, # Proponents are biased | |
"7_fossil_fuels_needed": 7 # Fossil fuels are needed | |
} | |
# Reverse mapping for display purposes | |
LABEL_DESCRIPTIONS = { | |
0: "No relevant claim detected", | |
1: "Global warming is not happening", | |
2: "Not caused by humans", | |
3: "Not bad or beneficial", | |
4: "Solutions harmful/unnecessary", | |
5: "Science is unreliable", | |
6: "Proponents are biased", | |
7: "Fossil fuels are needed" | |
} | |
# Load and prepare the dataset | |
print("Loading dataset...") | |
dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train") | |
# Convert string labels to integers | |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]}) | |
# Split dataset | |
train_test = dataset["train"].train_test_split(test_size=0.2, seed=42) | |
train_dataset = train_test["train"] | |
test_dataset = train_test["test"] | |
# Display preview | |
print("\nFirst 5 rows of test set:") | |
for i, example in enumerate(test_dataset.select(range(5))): | |
print(f"\nExample {i+1}:") | |
print(f"Text: {example['quote'][:100]}...") | |
print(f"Label: {example['label']} - {LABEL_DESCRIPTIONS[example['label']]}") | |
# Create the demo interface | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# Frugal AI Challenge - Text task - Submission portal | |
## Climate Disinformation Classification | |
""") | |
with gr.Tabs(): | |
with gr.Tab("Instructions"): | |
gr.Markdown(""" | |
To submit your results, please follow the steps below: | |
## Prepare your model submission | |
1. Clone the space of this portal on your own Hugging Face account. | |
2. Modify the ``evaluate`` function to replace the baseline by your model loading and inference within the inference pass where the energy consumption and emissions are tracked. | |
3. Eventually complete the requirements and/or any necessaries dependencies in your space. | |
4. Write down your model card in the ``modelcard.md`` file. | |
5. Deploy your space and verify that it works. | |
6. (Optional) You can change the Space hardware to use any GPU directly on Hugging Face. | |
## Submit your model to the leaderboard in the ``Model Submission`` tab | |
7. Step 1 - Evaluate model: Click on the button to evaluate your model. This will run you model, computes the accuracy on the test set (20% of the train set), and track the energy consumption and emissions. | |
8. Step 2 - Submit to leaderboard: Click on the button to submit your results to the leaderboard. This will upload the results to the leaderboard dataset and update the leaderboard. | |
9. You can see the leaderboard at https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text | |
""") | |
with gr.Tab("Model Submission"): | |
gr.Markdown("## Random Baseline Model") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
evaluate_btn = gr.Button("1. Evaluate model", variant="secondary") | |
with gr.Column(scale=1): | |
submit_btn = gr.Button("2. Submit to leaderboard", variant="primary", size="lg") | |
with gr.Row(): | |
accuracy_output = gr.Number(label="Accuracy", precision=4) | |
emissions_output = gr.Number(label="Emissions (gCO2eq)", precision=12) | |
energy_output = gr.Number(label="Energy Consumed (Wh)", precision=12) | |
with gr.Row(): | |
results_json = gr.JSON(label="Detailed Results", visible=True) | |
evaluate_btn.click( | |
evaluate, | |
inputs=None, | |
outputs=[accuracy_output, emissions_output, energy_output, results_json] | |
) | |
submit_btn.click( | |
submit_results, | |
inputs=[results_json], | |
outputs=None # No need for output component with popups | |
) | |
with gr.Tab("Model Card"): | |
with open("modelcard.md", "r") as f: | |
model_card_content = f.read() | |
gr.Markdown(model_card_content) | |
if __name__ == "__main__": | |
demo.launch() |