Spaces:

Navid-AI
/

The-Arabic-Rag-Leaderboard

Running on CPU Upgrade

App Files Files Community

The-Arabic-Rag-Leaderboard / utils.py

MohamedRashad

Normalize precision comparison to capitalize in model submission checks

bc40b2e about 2 months ago

raw

history blame

11.5 kB

	import gradio as gr
	import pandas as pd
	import json
	import os
	from pathlib import Path
	from huggingface_hub import HfApi, hf_hub_download

	api = HfApi()

	OWNER = "Navid-AI"
	DATASET_REPO_ID = f"{OWNER}/requests-dataset"

	results_dir = Path(__file__).parent / "results"

	# Cache the HF token to avoid multiple os.environ lookups.
	HF_TOKEN = os.environ.get('HF_TOKEN', None)

	# Add a helper to load JSON results with optional formatting.
	def load_json_results(file_path: Path, prepare_for_display=False, sort_col=None, drop_cols=None):
	if file_path.exists():
	df = pd.read_json(file_path)
	else:
	raise FileNotFoundError(f"File '{file_path}' not found.")
	if prepare_for_display:
	# Apply common mapping for model link formatting.
	df[["Model"]] = df[["Model"]].map(lambda x: f'<a href="https://huggingface.co/{x}" target="_blank">{x}</a>')
	if drop_cols is not None:
	df.drop(columns=drop_cols, inplace=True)
	if sort_col is not None:
	df.sort_values(sort_col, ascending=False, inplace=True)
	return df

	def load_retrieval_results(prepare_for_display=False, sort_col=None, drop_cols=None):
	dataframe_path = results_dir / "retrieval_results.json"
	return load_json_results(
	dataframe_path,
	prepare_for_display=prepare_for_display,
	sort_col=sort_col,
	drop_cols=drop_cols
	)

	def load_reranking_results(prepare_for_display=False, sort_col=None, drop_cols=None):
	dataframe_path = results_dir / "reranking_results.json"
	return load_json_results(
	dataframe_path,
	prepare_for_display=prepare_for_display,
	sort_col=sort_col,
	drop_cols=drop_cols
	)

	def get_model_info(model_id, verbose=False):
	model_info = api.model_info(model_id)
	num_downloads = model_info.downloads
	num_likes = model_info.likes
	license = model_info.card_data["license"]
	num_parameters = round(model_info.safetensors.total / 1e6)
	supported_precisions = list(model_info.safetensors.parameters.keys())
	if verbose:
	print(f"Model '{model_id}' has {num_downloads} downloads, {num_likes} likes, and is licensed under {license}.")
	print(f"The model has approximately {num_parameters:.2f} billion parameters.")
	print(f"The model supports the following precisions: {supported_precisions}")
	return num_downloads, num_likes, license, num_parameters, supported_precisions

	def fetch_model_information(model_name):
	try:
	num_downloads, num_likes, license, num_parameters, supported_precisions = get_model_info(model_name)
	if len(supported_precisions) == 0:
	supported_precisions = [None]
	except Exception as e:
	gr.Error(f"Error: Could not fetch model information. {str(e)}")
	return
	return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes

	def submit_model(model_name, revision, precision, params, license, task):
	# Load existing evaluations
	if task == "Retriever":
	df = load_retrieval_results()
	elif task == "Reranker":
	df = load_reranking_results()
	else:
	return "Task is not supported 🤷‍♂️"

	existing_models_results = df[['Model', 'Revision', 'Precision', 'Task']]

	# Handle 'Missing' precision
	if precision == 'Missing':
	precision = None
	else:
	precision = precision.strip().lower()

	# Load pending and finished requests from the dataset repository
	df_pending = load_requests('pending')
	df_finished = load_requests('finished')

	# Check if model is already evaluated
	model_exists_in_results = ((existing_models_results['Model'] == model_name) &
	(existing_models_results['Revision'] == revision) &
	(existing_models_results['Precision'] == precision.capitalize()) &
	(existing_models_results['Task'] == task)).any()
	if model_exists_in_results:
	return f"Model {model_name} has already been evaluated as a {task} 🎉"

	# Check if model is in pending requests
	if not df_pending.empty:
	existing_models_pending = df_pending[['model_name', 'revision', 'precision', 'task']]
	model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) &
	(existing_models_pending['revision'] == revision) &
	(existing_models_pending['precision'] == precision.capitalize()) &
	(existing_models_pending['task'] == task)).any()
	if model_exists_in_pending:
	return f"Model {model_name} is already in the evaluation queue as a {task} 🚀"

	# Check if model is in finished requests
	if not df_finished.empty:
	existing_models_finished = df_finished[['model_name', 'revision', 'precision', 'task']]
	model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
	(existing_models_finished['revision'] == revision) &
	(existing_models_finished['precision'] == precision.capitalize()) &
	(existing_models_finished['task'] == task)).any()
	if model_exists_in_finished:
	return f"Model {model_name} has already been evaluated as a {task} 🎉"

	# Check if model exists on HuggingFace Hub
	try:
	api.model_info(model_name)
	except Exception as e:
	print(f"Error fetching model info: {e}")
	return f"Model {model_name} not found on HuggingFace Hub 🤷‍♂️"

	# Proceed with submission
	status = "PENDING"

	# Prepare the submission data
	submission = {
	"model_name": model_name,
	"license": license,
	"revision": revision,
	"precision": precision,
	"status": status,
	"params": params,
	"task": task
	}

	# Serialize the submission to JSON
	submission_json = json.dumps(submission, indent=2)

	# Define the file path in the repository
	org_model = model_name.split('/')
	if len(org_model) != 2:
	return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct' 🤷‍♂️"
	org, model_id = org_model
	precision_str = precision if precision else 'Missing'
	file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json"

	# Upload the submission to the dataset repository
	try:
	api.upload_file(
	path_or_fileobj=submission_json.encode('utf-8'),
	path_in_repo=file_path_in_repo,
	repo_id=DATASET_REPO_ID,
	repo_type="dataset",
	token=HF_TOKEN
	)
	except Exception as e:
	print(f"Error uploading file: {e}")
	return f"Error: Could not submit model '{model_name}' for evaluation."

	return f"Model {model_name} has been submitted successfully as a {task} 🚀"

	def load_requests(status_folder, task_type=None):
	api = HfApi()
	requests_data = []
	folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'

	try:
	# Use the cached token
	files_info = api.list_repo_files(
	repo_id=DATASET_REPO_ID,
	repo_type="dataset",
	token=HF_TOKEN
	)
	except Exception as e:
	print(f"Error accessing dataset repository: {e}")
	return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible

	# Filter files in the desired folder
	files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]

	for file_path in files_in_folder:
	try:
	# Download the JSON file
	local_file_path = hf_hub_download(
	repo_id=DATASET_REPO_ID,
	filename=file_path,
	repo_type="dataset",
	token=HF_TOKEN
	)
	# Load JSON data
	with open(local_file_path, 'r') as f:
	request = json.load(f)
	requests_data.append(request)
	except Exception as e:
	print(f"Error loading file {file_path}: {e}")
	continue # Skip files that can't be loaded

	df = pd.DataFrame(requests_data)

	# Filter by task type
	if task_type and not df.empty:
	df = df[df['task'] == task_type]

	return df


	def submit_gradio_module(task_type):
	var = gr.State(value=task_type)
	with gr.Row(equal_height=True):
	model_name_input = gr.Textbox(
	label="Model",
	placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)",
	scale=4,
	)
	fetch_data_button = gr.Button(value="Auto Fetch Model Info", variant="secondary")

	with gr.Row():
	precision_input = gr.Dropdown(
	choices=["F16", "F32", "BF16", "I8", "U8", "I16"],
	label="Precision",
	value="F16"
	)
	license_input = gr.Textbox(
	label="License",
	placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)",
	value="Open"
	)
	revision_input = gr.Textbox(
	label="Revision",
	placeholder="main",
	value="main"
	)

	with gr.Row():
	params_input = gr.Textbox(
	label="Params (in Millions)",
	interactive=False,
	)
	num_downloads_input = gr.Textbox(
	label="Number of Downloads",
	interactive=False,
	)
	num_likes_input = gr.Textbox(
	label="Number of Likes",
	interactive=False,
	)

	submit_button = gr.Button("Submit Model", variant="primary")
	submission_result = gr.Textbox(label="Submission Result", interactive=False)
	fetch_outputs = [precision_input, license_input, params_input, num_downloads_input, num_likes_input]

	fetch_data_button.click(
	fetch_model_information,
	inputs=[model_name_input],
	outputs=fetch_outputs
	)
	model_name_input.submit(
	fetch_model_information,
	inputs=[model_name_input],
	outputs=fetch_outputs
	)
	submit_button.click(
	submit_model,
	inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var],
	outputs=submission_result
	)

	# Load pending, finished, and failed requests
	df_pending = load_requests('pending', task_type)
	df_finished = load_requests('finished', task_type)
	df_failed = load_requests('failed', task_type)

	# Display the tables
	gr.Markdown("## Evaluation Status")
	with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
	if not df_pending.empty:
	gr.Dataframe(df_pending)
	else:
	gr.Markdown("No pending evaluations.")
	with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
	if not df_finished.empty:
	gr.Dataframe(df_finished)
	else:
	gr.Markdown("No finished evaluations.")
	with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
	if not df_failed.empty:
	gr.Dataframe(df_failed)
	else:
	gr.Markdown("No failed evaluations.")