File size: 6,970 Bytes
be5cb3c
1592511
7aa3345
1592511
 
be5cb3c
22c03cf
e31e0cb
b2065f5
 
1ba23a3
b2065f5
3272f01
 
7aa3345
1592511
 
 
 
 
 
7aa3345
1592511
 
be5cb3c
 
3e20174
77394a6
7aa3345
1592511
 
 
 
 
 
1463aae
1592511
 
 
 
8fd12db
1ef368a
 
e7f98af
1ef368a
 
 
 
be5cb3c
 
 
 
1592511
 
 
 
 
 
 
be5cb3c
 
ade3aa2
1592511
85b290e
1592511
 
 
 
85b290e
1592511
 
1ba23a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1592511
 
40a0529
 
1592511
1882dfb
 
 
1592511
be5cb3c
a51385c
e31e0cb
 
b2065f5
 
 
 
 
 
 
 
12b8836
 
b2065f5
7d90ef2
 
 
 
b2065f5
 
 
 
 
 
 
be5cb3c
 
1e98520
be5cb3c
22c03cf
 
 
be5cb3c
bcf11a1
8fd12db
1ba23a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from gradio_client import Client, handle_file
from huggingface_hub import snapshot_download, login, whoami
import argparse
import os
import subprocess as sp
import time
from datetime import datetime, timezone
import shutil  # Add this import
import json
from huggingface_hub import HfApi, grant_access
import re

HF_GATE_ACESSLIST = ["SushantGautam",
                     "stevenah", "vlbthambawita"]

MEDVQA_SUBMIT = True if os.environ.get(
    '_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
parser = argparse.ArgumentParser(description='Run GI-1015 Task 1 (VQA)')
parser.add_argument('--repo_id', type=str, required=True,
                    help='Path to the HF submission repository')
args, _ = parser.parse_known_args()

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
submission_file = "submission_task1.py"
file_from_validation = "predictions_1.json"

min_library = ["datasets>=3.4.1", "transformers", "evaluate",
               "rouge_score", 'tqdm', "gradio_client>=1.8.0"]

print("🌟 ImageCLEFmed-MEDVQA-GI-2025 🌟",
      "https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025")
print("πŸ” Subtask 1: Algorithm Development for Question Interpretation and Response")
print(f"πŸ‘€ Analyzing submission repository: {args.repo_id} πŸ‘€")

try:
    print(f"Logged in to HuggingFace as: {whoami()['name']}")
except Exception:
    print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
    login()

client = Client("SimulaMet/medvqa")
print("πŸ’“ Communicating with the Submission Server: Ping!")
result = client.predict(
    api_name="/refresh_page"
)
print(result)


hf_username = whoami()['name']
assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
current_timestamp = int(time.time())

snap_dir = snapshot_download(
    repo_id=args.repo_id, allow_patterns=[submission_file, "requirements.txt"])

if not os.path.isfile(os.path.join(snap_dir, submission_file)):
    raise FileNotFoundError(
        f"Submission file '{submission_file}' not found in the repository!")

if os.path.isfile(os.path.join(snap_dir, file_from_validation)):
    os.remove(os.path.join(snap_dir, file_from_validation))

print("πŸ“¦ Making sure of the minimum requirements to run the script πŸ“¦")
sp.run(["python", "-m", "pip", "install", "-q"] + min_library, check=True)

if os.path.isfile(os.path.join(snap_dir, "requirements.txt")):
    print(
        f"πŸ“¦ Installing requirements from the submission repo: {args.repo_id}/requirements.txt")
    sp.run(["python", "-m", "pip", "install", "-q", "-r",
            f"{snap_dir}/requirements.txt"], cwd=snap_dir, check=True)


if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
    # Patch submission file for challenge evaluation
    challenge_file = submission_file.replace(".py", "_challenge.py")
    submission_path = os.path.join(snap_dir, submission_file)
    challenge_path = os.path.join(snap_dir, challenge_file)
    with open(submission_path, "r", encoding="utf-8") as f:
        code = f.read()
    # Use regex to match the line, ignoring whitespace
    pattern = r'val_dataset\s*=\s*load_dataset\(\s*["\']SimulaMet/Kvasir-VQA-test["\']\s*,\s*split\s*=\s*["\']validation["\']\s*\)'
    new_line = 'val_dataset = load_dataset("SimulaMet/Kvasir-VQA-private", split="test")'
    if re.search(pattern, code):
        code = re.sub(pattern, new_line, code)
        with open(challenge_path, "w", encoding="utf-8") as f:
            f.write(code)
        submission_file = challenge_file
        print(f"πŸ”„ Challenge file created at: {challenge_path}")
    else:
        print("⚠️ Challenge patch not applied: expected line not found in submission file.")
        os.exit(
            "Please check the submission file for compatibility with challenge evaluation.")


sp.run(["python", f"{snap_dir}/{submission_file}"],
       cwd=snap_dir, check=True)
print(
    f"πŸŽ‰ The submission script ran successfully, the intermediate files are at {snap_dir}")

if not MEDVQA_SUBMIT:
    print("\n You can now run medvqa validate_and_submit .... command to submit the task.")
else:
    print("πŸš€ Preparing for submission πŸš€")
    file_path_to_upload = os.path.join(
        snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task1.json")
    shutil.copy(os.path.join(snap_dir, file_from_validation),
                file_path_to_upload)  # Use shutil.copy here
    # add repo_id to the submission file
    with open(file_path_to_upload, 'r', encoding='utf-8') as f:
        data = json.load(f)
        data['repo_id'] = args.repo_id
        with open(file_path_to_upload, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False)
    api = HfApi()
    api.update_repo_visibility(args.repo_id, private=False)  # Make public
    api.update_repo_settings(
        args.repo_id, gated='manual')  # Enable gated access
    for user in HF_GATE_ACESSLIST:
        try:
            grant_access(args.repo_id, user)  # Grant access
        except Exception as e:
            print(user, ":", e)
    print(
        f'''βœ… {args.repo_id} model is now made public, but gated, and is shared with organizers.
        You should not make the model private or remove/update it until the competition results are announced.
        Feel feel to re-submit the task if you change the model on the repository.
        We will notify you if there are any issues with the submission.
        ''')

    result = client.predict(
        file=handle_file(file_path_to_upload),
        api_name="/add_submission"
    )
    print({"User": hf_username, "Task": "task1",
           "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
           })
    print(result)
    print("Visit this URL to see the entry: πŸ‘‡")
    Client("SimulaMet/medvqa")


if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
    src_json = os.path.join(snap_dir, "predictions_1.json")
    if os.path.isfile(src_json):
        with open(src_json, "r", encoding="utf-8") as f:
            data = json.load(f)
        # Remove 'debug' key if present
        data.pop("debug", None)
        # Rename 'public_scores' to 'challenge_scores' if present
        if "public_scores" in data:
            data["challenge_scores"] = data.pop("public_scores")
        # Get Team_Name from submission_info
        team_name = data.get("submission_info", {}).get(
            "Team_Name", "unknown_team")
        team_name_safe = re.sub(r'[^a-zA-Z0-9_\-]', '_', team_name)
        out_json = os.path.join(os.getcwd(), f"task1_{team_name_safe}.json")
        with open(out_json, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print(f"βœ… Copied and processed predictions to: {out_json}")
    else:
        print("❌ predictions_1.json not found in snapshot directory!")
    # === End: Post-processing predictions_1.json ===