File size: 3,769 Bytes
f4e2c1f 4d6e8c2 f4e2c1f 4d6e8c2 f4e2c1f 4d6e8c2 f4e2c1f 4d6e8c2 98fe162 1c33274 70f5f26 f4e2c1f 4d6e8c2 70f5f26 f4e2c1f 70f5f26 4d6e8c2 f4e2c1f 4d6e8c2 f4e2c1f 4d6e8c2 70f5f26 f4e2c1f 70f5f26 f4e2c1f 4d6e8c2 64079b0 ff046db 64079b0 b299c15 64079b0 70f5f26 f4e2c1f 70f5f26 f4e2c1f 70f5f26 4d6e8c2 f4e2c1f 4d6e8c2 f4e2c1f 4d6e8c2 70f5f26 4d6e8c2 1c33274 4d6e8c2 f4e2c1f 4d6e8c2 f4e2c1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import random
from datetime import datetime
from datasets import load_dataset
from fastapi import APIRouter
from sklearn.metrics import accuracy_score
from .utils.emissions import clean_emissions_data, get_space_info, tracker
from .utils.evaluation import TextEvaluationRequest
router = APIRouter()
DESCRIPTION = "RF overfitted like crazy"
ROUTE = "/text"
@router.post(ROUTE, tags=["Text Task"], description=DESCRIPTION)
async def evaluate_text(request: TextEvaluationRequest):
"""
Evaluate text classification for climate disinformation detection.
Current Model: Random Baseline
- Makes random predictions from the label space (0-7)
- Used as a baseline for comparison
"""
# Get space info
username, space_url = get_space_info()
# Define the label mapping
LABEL_MAPPING = {
"0_not_relevant": 0,
"1_not_happening": 1,
"2_not_human": 2,
"3_not_bad": 3,
"4_solutions_harmful_unnecessary": 4,
"5_science_unreliable": 5,
"6_proponents_biased": 6,
"7_fossil_fuels_needed": 7,
}
# Load and prepare the dataset
dataset = load_dataset(request.dataset_name)
# Convert string labels to integers
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
# Split dataset
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
test_dataset = train_test["test"]
# Start tracking emissions
tracker.start()
tracker.start_task("inference")
# --------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE CODE HERE
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
# --------------------------------------------------------------------------------------------
# Make random predictions (placeholder for actual model inference)
true_labels = test_dataset["label"]
import joblib
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Convert "quote" key into embeddings
def embed_quote(example):
example["quote_embedding"] = embedding_model.encode(example["quote"]).tolist()
return example
test_dataset = test_dataset.map(embed_quote, batched=True)
# Load the model from the file
rf_loaded = joblib.load("models/random_forest_model.pkl")
import numpy as np
# predictions = np.array([np.argmax(x) for x in preds[0]])
predictions = rf_loaded.predict(np.array(test_dataset["quote_embedding"]))
# --------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE STOPS HERE
# --------------------------------------------------------------------------------------------
# Stop tracking emissions
emissions_data = tracker.stop_task()
# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
# Prepare results dictionary
results = {
"username": username,
"space_url": space_url,
"submission_timestamp": datetime.now().isoformat(),
"model_description": DESCRIPTION,
"accuracy": float(accuracy),
"energy_consumed_wh": emissions_data.energy_consumed * 1000,
"emissions_gco2eq": emissions_data.emissions * 1000,
"emissions_data": clean_emissions_data(emissions_data),
"api_route": ROUTE,
"dataset_config": {
"dataset_name": request.dataset_name,
"test_size": request.test_size,
"test_seed": request.test_seed,
},
}
return results
|