Spaces:

SimulaMet
/

MedVQA

Running

App Files Files Community

MedVQA / medvqa /competitions /gi-2025 /task_2.py

SushantGautam

Replace "validation" with "test" in challenge evaluation code

947c62b about 2 months ago

raw

history blame contribute delete

8.18 kB

	from gradio_client import Client, handle_file
	from huggingface_hub import snapshot_download, login, whoami
	import argparse
	import os
	import subprocess as sp
	import time
	from datetime import datetime, timezone
	import shutil # Add this import
	import json
	from huggingface_hub import HfApi, grant_access
	import re

	HF_GATE_ACESSLIST = ["SushantGautam",
	"stevenah", "vlbthambawita"]

	MEDVQA_SUBMIT = True if os.environ.get(
	'_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
	parser = argparse.ArgumentParser(
	description='Run GI-1015 Task 2 (Image Generation)')
	parser.add_argument('--repo_id', type=str, required=True,
	help='Path to the HF submission repository')
	args, _ = parser.parse_known_args()

	os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
	submission_file = "submission_task2.py"
	file_from_validation = "predictions_2.json"

	min_library = ["datasets>=3.4.1", "transformers", "evaluate", "scipy", "scikit-learn", "diffusers", "peft",
	"rouge_score", 'tqdm', "gradio_client>=1.8.0"]

	print("🌟 ImageCLEFmed-MEDVQA-GI-2025 🌟",
	"https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025")
	print("🔍 Subtask 2: Creation of High-Fidelity Synthetic GI Images")
	print(f"👀 Analyzing submission repository: {args.repo_id} 👀")

	try:
	print(f"Logged in to HuggingFace as: {whoami()['name']}")
	except Exception:
	print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
	login()

	client = Client("SimulaMet/medvqa")
	print("💓 Communicating with the Submission Server: Ping!")
	result = client.predict(
	api_name="/refresh_page"
	)
	print(result)


	hf_username = whoami()['name']
	assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
	current_timestamp = int(time.time())

	snap_dir = snapshot_download(
	repo_id=args.repo_id, allow_patterns=[submission_file, "requirements.txt"])

	if not os.path.isfile(os.path.join(snap_dir, submission_file)):
	raise FileNotFoundError(
	f"Submission file '{submission_file}' not found in the repository!")

	if os.path.isfile(os.path.join(snap_dir, file_from_validation)):
	os.remove(os.path.join(snap_dir, file_from_validation))

	print("📦 Making sure of the minimum requirements to run the script 📦")
	sp.run(["python", "-m", "pip", "install", "-q"] + min_library, check=True)

	if os.path.isfile(os.path.join(snap_dir, "requirements.txt")):
	print(
	f"📦 Installing requirements from the submission repo: {args.repo_id}/requirements.txt")
	sp.run(["python", "-m", "pip", "install", "-q", "-r",
	f"{snap_dir}/requirements.txt"], cwd=snap_dir, check=True)

	if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
	# Patch submission file for challenge evaluation
	challenge_file = submission_file.replace(".py", "_challenge.py")
	submission_path = os.path.join(snap_dir, submission_file)
	challenge_path = os.path.join(snap_dir, challenge_file)
	with open(submission_path, "r", encoding="utf-8") as f:
	code = f.read()
	# Replace only the dataset string
	if "SimulaMet/Kvasir-VQA-test" in code:
	code = code.replace("SimulaMet/Kvasir-VQA-test",
	"SimulaMet/Kvasir-VQA-private")
	code = code.replace('"validation"', '"test"')
	# Comment out specific lines
	lines = code.splitlines()
	for i, line in enumerate(lines):
	if ("huggingface.co/datasets" in line or
	re.search(r'^\sprompt_to_real\s=', line) or
	re.search(r'^\sjsons__\s=', line)):
	if not line.lstrip().startswith("#"):
	leading_ws = len(line) - len(line.lstrip())
	lines[i] = line[:leading_ws] + "# " + line[leading_ws:]
	# Insert new code block after 'import requests'
	for i, line in enumerate(lines):
	if "import requests" in line:
	insert_idx = i + 1
	break
	else:
	insert_idx = None
	new_block = [
	'from huggingface_hub import hf_hub_download',
	'prompt_to_real = json.load(open(hf_hub_download("SimulaMet/Kvasir-VQA-private", "real_mapping", repo_type="dataset")))',
	'jsons__ = json.load(open(hf_hub_download("SimulaMet/Kvasir-VQA-private", "imagen-test", repo_type="dataset")))',
	]
	if insert_idx is not None:
	lines[insert_idx:insert_idx] = new_block
	code = "\n".join(lines)
	with open(challenge_path, "w", encoding="utf-8") as f:
	f.write(code)
	submission_file = challenge_file
	print(f"🔄 Challenge file created at: {challenge_path}")
	else:
	print(
	"⚠️ Challenge patch not applied: expected string not found in submission file.")
	os.exit(
	"Please check the submission file for compatibility with challenge evaluation.")

	print("🔍 Starting your script and loading submission details...")
	sp.run(["python", f"{snap_dir}/{submission_file}"],
	cwd=snap_dir, check=True)
	print(
	f"🎉 The submission script ran successfully, the intermediate files are at {snap_dir}")

	if not MEDVQA_SUBMIT:
	print("\n You can now run medvqa validate_and_submit .... command to submit the task.")
	else:
	print("🚀 Preparing for submission 🚀")
	file_path_to_upload = os.path.join(
	snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task2.json")
	shutil.copy(os.path.join(snap_dir, file_from_validation),
	file_path_to_upload) # Use shutil.copy here
	# add repo_id to the submission file
	with open(file_path_to_upload, 'r', encoding='utf-8') as f:
	data = json.load(f)
	data['repo_id'] = args.repo_id
	with open(file_path_to_upload, 'w', encoding='utf-8') as f:
	json.dump(data, f, ensure_ascii=False)
	api = HfApi()
	api.update_repo_visibility(args.repo_id, private=False) # Make public
	api.update_repo_settings(
	args.repo_id, gated='manual') # Enable gated access
	for user in HF_GATE_ACESSLIST:
	try:
	grant_access(args.repo_id, user) # Grant access
	except Exception as e:
	print(user, ":", e)
	print(
	f'''✅ {args.repo_id} model is now made public, but gated, and is shared with organizers.
	You should not make the model private or remove/update it until the competition results are announced.
	Feel feel to re-submit the task if you change the model on the repository.
	We will notify you if there are any issues with the submission.
	''')

	result = client.predict(
	file=handle_file(file_path_to_upload),
	api_name="/add_submission"
	)
	print({"User": hf_username, "Task": "task2",
	"Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
	})
	print(result)
	print("Visit this URL to see the entry: 👇")
	Client("SimulaMet/medvqa")

	if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
	src_json = os.path.join(snap_dir, "predictions_2.json")
	if os.path.isfile(src_json):
	with open(src_json, "r", encoding="utf-8") as f:
	data = json.load(f)
	# Remove 'debug' key if present
	data.pop("debug", None)
	# Rename 'public_scores' to 'challenge_scores' if present
	if "public_scores" in data:
	data["challenge_scores"] = data.pop("public_scores")
	# Get Team_Name from submission_info
	team_name = data.get("submission_info", {}).get(
	"Team_Name", "unknown_team")
	team_name_safe = re.sub(r'[^a-zA-Z0-9_\-]', '_', team_name)
	out_json = os.path.join(os.getcwd(), f"task2_{team_name_safe}.json")
	with open(out_json, "w", encoding="utf-8") as f:
	json.dump(data, f, ensure_ascii=False, indent=2)
	print(f"✅ Copied and processed predictions to: {out_json}")
	else:
	print("❌ predictions_1.json not found in snapshot directory!")
	# === End: Post-processing predictions_1.json ===