n0w0f's picture
chore: top 20 auths, cap score
7b5eba5
import datetime
import os
import re
import gradio as gr
import pandas as pd
import yaml
# Constants
EVAL_CARDS_DIR = "eval_cards"
TEMPLATE_PATH = "template.yaml"
# Ensure the eval cards directory exists
os.makedirs(EVAL_CARDS_DIR, exist_ok=True)
# Copy the template to the appropriate location
with open("template.yaml", "w") as f:
with open("yaml_template.yaml", "r") as template_file:
f.write(template_file.read())
def load_template():
"""Load the YAML template"""
with open(TEMPLATE_PATH, "r") as file:
return file.read()
def yaml_to_dict(yaml_str):
"""Convert YAML string to Python dictionary"""
try:
return yaml.safe_load(yaml_str)
except yaml.YAMLError as e:
return {"error": str(e)}
def compute_coverage_score(eval_data):
"""
Compute a coverage score for the eval card
Returns a score from 0-100 and a breakdown of coverage by section
"""
sections = {
"metadata": 5,
"evaluation_design": 10,
"estimand": 20,
"estimator": 20,
"estimate": 20,
"results_communication": 10,
"known_issues_and_limitations": 10,
"version_and_maintenance": 5,
"citation_and_usage": 5,
}
scores = {}
total_score = 0
def count_filled_fields(data, prefix=""):
if isinstance(data, dict):
filled = 0
total = 0
for key, value in data.items():
if isinstance(value, (dict, list)):
sub_filled, sub_total = count_filled_fields(
value, f"{prefix}.{key}" if prefix else key
)
filled += sub_filled
total += sub_total
else:
total += 1
if value and not (
isinstance(value, str) and value.strip() in ["", "[]", "{}"]
):
filled += 1
return filled, total
elif isinstance(data, list):
if not data:
return 0, 1
filled = 0
total = 0
for item in data:
sub_filled, sub_total = count_filled_fields(item)
filled += sub_filled
total += sub_total
return filled, total
else:
return 1 if data else 0, 1
# Compute scores for each section
for section, weight in sections.items():
if section in eval_data:
filled, total = count_filled_fields(eval_data[section])
completion_rate = filled / total if total > 0 else 0
scores[section] = {
"score": round(completion_rate * weight, 2),
"max_score": weight,
"completion_rate": round(completion_rate * 100, 2),
"fields_filled": filled,
"fields_total": total,
}
total_score += scores[section]["score"]
else:
scores[section] = {
"score": 0,
"max_score": weight,
"completion_rate": 0,
"fields_filled": 0,
"fields_total": 0,
}
return min(round(total_score, 2), 100), scores
def get_llm_feedback(yaml_content, api_token=None):
"""
Get feedback on the eval card from Groq's LLM
Uses GROQ_API_KEY from environment variables if no token is provided
"""
import os
import requests
from dotenv import load_dotenv
# Load environment variables from .env file if it exists
load_dotenv()
# Use provided token or get from environment
api_token = api_token or os.environ.get("GROQ_API_KEY")
if not api_token:
return "API token is required for LLM feedback. Please set the GROQ_API_KEY environment variable or provide a token."
try:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_token}",
}
prompt = f"""
I'm reviewing an Evaluation Card in YAML format. Please analyze it for completeness,
consistency, and clarity. Provide specific recommendations for improvement.
Focus on:
1. Sections that need more detail
2. Inconsistencies or contradictions
3. Clarity of language and explanations
4. Alignment with best practices for ML evaluation
Here's the YAML content:
```yaml
{yaml_content}
```
Provide your feedback in a structured format with specific, actionable recommendations.
"""
payload = {
"model": "llama-3.3-70b-versatile", # or another groq supported model
"messages": [{"role": "user", "content": prompt}],
}
response = requests.post(
"https://api.groq.com/openai/v1/chat/completions",
headers=headers,
json=payload,
)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
else:
return f"Error getting Groq LLM feedback: {response.status_code} - {response.text}"
except Exception as e:
return f"Error getting Groq LLM feedback: {str(e)}"
def save_eval_card(yaml_content, paper_url="", repo_url=""):
"""Save an eval card with additional metadata"""
try:
eval_data = yaml.safe_load(yaml_content)
# Add paper and repository links to metadata
if paper_url:
eval_data["metadata"]["paper_link"] = paper_url
if repo_url:
eval_data["metadata"]["repository_link"] = repo_url
# Update the YAML content with the new metadata
yaml_content = yaml.dump(eval_data)
filename = re.sub(r"[^\w\-_]", "_", eval_data.get("title", "Unnamed"))
filename = (
f"{filename}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml"
)
file_path = os.path.join(EVAL_CARDS_DIR, filename)
with open(file_path, "w") as file:
file.write(yaml_content)
return f"Evaluation card saved successfully as {filename}", file_path
except Exception as e:
return f"Error saving evaluation card: {str(e)}", None
def load_all_eval_cards():
"""Load all eval cards from the repository"""
eval_cards = []
for filename in os.listdir(EVAL_CARDS_DIR):
if filename.endswith(".yaml"):
file_path = os.path.join(EVAL_CARDS_DIR, filename)
try:
with open(file_path, "r") as file:
yaml_content = file.read()
eval_data = yaml.safe_load(yaml_content)
# Compute coverage score
score, score_details = compute_coverage_score(eval_data)
score = min(score, 100)
# Extract key metadata
eval_cards.append(
{
"filename": filename,
"title": eval_data.get("title", "Unnamed Evaluation"),
"summary": eval_data.get("summary", ""),
"authors": ", ".join(
eval_data.get("metadata", {}).get("authors", [])
),
"creation_date": eval_data.get("metadata", {}).get(
"creation_date", ""
),
"coverage_score": score,
"score_details": score_details,
"yaml_content": yaml_content,
"data": eval_data,
}
)
except Exception as e:
print(f"Error loading {filename}: {str(e)}")
return eval_cards
def format_eval_card_as_html(eval_card):
"""Format an eval card as HTML for display"""
html = f"""
<div style="border: 1px solid #ddd; padding: 15px; margin-bottom: 20px; border-radius: 5px;">
<h3>{eval_card["title"]}</h3>
<p>{eval_card["summary"]}</p>
<p><strong>Authors:</strong> {eval_card["authors"]}</p>
<p><strong>Created:</strong> {eval_card["creation_date"]}</p>
<!-- Add repository and paper links if available -->
{f'<p><strong>Repository:</strong> <a href="{eval_card["data"]["metadata"].get("repository_link", "")}" target="_blank">{eval_card["data"]["metadata"].get("repository_link", "")}</a></p>' if eval_card["data"]["metadata"].get("repository_link") else ""}
{f'<p><strong>Paper:</strong> <a href="{eval_card["data"]["metadata"].get("paper_link", "")}" target="_blank">{eval_card["data"]["metadata"].get("paper_link", "")}</a></p>' if eval_card["data"]["metadata"].get("paper_link") else ""}
<p><strong>Coverage Score:</strong> {eval_card["coverage_score"]}%</p>
<h4>Coverage by Section:</h4>
<table style="width: 100%; border-collapse: collapse;">
<tr>
<th style="text-align: left; padding: 5px; border-bottom: 1px solid #ddd;">Section</th>
<th style="text-align: right; padding: 5px; border-bottom: 1px solid #ddd;">Score</th>
<th style="text-align: right; padding: 5px; border-bottom: 1px solid #ddd;">Completion</th>
</tr>
"""
for section, details in eval_card["score_details"].items():
html += f"""
<tr>
<td style="padding: 5px; border-bottom: 1px solid #eee;">{section}</td>
<td style="text-align: right; padding: 5px; border-bottom: 1px solid #eee;">{details["score"]}/{details["max_score"]}</td>
<td style="text-align: right; padding: 5px; border-bottom: 1px solid #eee;">{details["completion_rate"]}%</td>
</tr>
"""
html += """
</table>
<div style="margin-top: 15px;">
<!-- Additional actions can go here -->
</div>
</div>
"""
return html
def create_eval_cards_table(eval_cards):
"""Create an HTML table of eval cards"""
if not eval_cards:
return "<p>No evaluation cards found.</p>"
# Sort by coverage score (highest first)
eval_cards.sort(key=lambda x: x["coverage_score"], reverse=True)
html = ""
for eval_card in eval_cards:
html += format_eval_card_as_html(eval_card)
return html
def upload_file(file):
"""Process an uploaded YAML file"""
if file is None:
return "No file uploaded", None
try:
yaml_content = file.decode("utf-8")
# Validate YAML
eval_data = yaml.safe_load(yaml_content)
return yaml_content, eval_data
except Exception as e:
return f"Error processing file: {str(e)}", None
def get_feedback(yaml_content):
"""Get LLM feedback on the eval card"""
if not yaml_content:
return "Please upload or paste a YAML file first."
# Use provided token or get from environment
api_token = os.environ.get("GROQ_API_KEY")
if not api_token:
return (
"Please provide an API token or set the GROQ_API_KEY environment variable."
)
feedback = get_llm_feedback(yaml_content, api_token)
return feedback
def submit_eval_card(yaml_content, paper_url="", repo_url=""):
"""Submit an eval card to the repository"""
if not yaml_content:
return "Please upload or paste a YAML file first.", None, None
try:
# Validate YAML
eval_data = yaml.safe_load(yaml_content)
# Compute coverage score
score, score_details = compute_coverage_score(eval_data)
# Save eval card with URLs
result, file_path = save_eval_card(yaml_content, paper_url, repo_url)
if file_path:
return (
f"Evaluation card saved successfully! Coverage score: {score}%",
score,
score_details,
)
else:
return f"Error saving evaluation card: {result}", None, None
except Exception as e:
return f"Error processing evaluation card: {str(e)}", None, None
def refresh_gallery():
"""Refresh the gallery of eval cards"""
eval_cards = load_all_eval_cards()
html = create_eval_cards_table(eval_cards)
# Convert data to pandas DataFrame for table view
table_data = []
for card in eval_cards:
author_counts = {}
for card in eval_cards:
authors = card["authors"].split(", ")
for author in authors:
if author in author_counts:
author_counts[author] += 1
else:
author_counts[author] = 1
top_authors = sorted(author_counts.items(), key=lambda x: x[1], reverse=True)[:20]
top_authors = [author for author, count in top_authors]
for card in eval_cards:
authors = card["authors"].split(", ")
filtered_authors = [author for author in authors if author in top_authors]
table_data.append(
{
"Title": card["title"],
"Authors": ", ".join(filtered_authors),
"Creation Date": card["creation_date"],
"Coverage Score": f"{card['coverage_score']}%",
}
)
df = pd.DataFrame(table_data)
return html, df if not df.empty else None
def handle_upload_tab(file_obj, yaml_text):
"""Handle upload tab actions - either use uploaded file or pasted text"""
if file_obj is not None:
yaml_content, eval_data = upload_file(file_obj)
return yaml_content
else:
return yaml_text
# Create the Gradio interface
with gr.Blocks(title="Evaluation Cards Gallery") as app:
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("# Evaluation Cards for Machine Learning in Materials Science. ")
gr.Markdown("""
Upload your evaluation card in YAML format, get feedback from an LLM, and submit it to the gallery.
checkout the [GitHub repository](https://github.com/lamalab-org/eval-cards) for more information.
""")
with gr.Tabs():
with gr.TabItem("Upload & Review"):
with gr.Row():
with gr.Column():
file_upload = gr.File(
label="Upload YAML File", file_types=[".yaml", ".yml"]
)
with gr.Accordion("Or paste YAML content", open=False):
yaml_input = gr.TextArea(
label="YAML Content",
placeholder="Paste your YAML content here...",
lines=10,
)
paper_url_input = gr.Textbox(
label="Paper URL (Optional)",
placeholder="https://arxiv.org/abs/...",
)
repo_url_input = gr.Textbox(
label="Repository URL (Optional)",
placeholder="https://github.com/...",
)
load_template_btn = gr.Button("Load Template")
# api_token = gr.Textbox(label="API Token (for LLM feedback)", type="password")
with gr.Row():
get_feedback_btn = gr.Button("Get LLM Feedback")
submit_btn = gr.Button(
"Submit Evaluation Card", variant="primary"
)
with gr.Column():
yaml_display = gr.TextArea(label="Current YAML", lines=20)
with gr.Accordion("LLM Feedback", open=True):
feedback_display = gr.Markdown()
with gr.Accordion("Submission Result", open=True):
result_display = gr.Markdown()
coverage_score = gr.Number(
label="Coverage Score", visible=False
)
coverage_details = gr.JSON(
label="Coverage Details", visible=False
)
with gr.TabItem("Gallery"):
refresh_btn = gr.Button("Refresh Gallery")
with gr.Tabs():
with gr.TabItem("Card View"):
gallery_html = gr.HTML()
with gr.TabItem("Table View"):
gallery_table = gr.DataFrame()
# Set up event handlers
load_template_btn.click(fn=load_template, outputs=[yaml_display])
file_upload.change(
fn=handle_upload_tab, inputs=[file_upload, yaml_input], outputs=[yaml_display]
)
yaml_input.change(fn=lambda x: x, inputs=[yaml_input], outputs=[yaml_display])
get_feedback_btn.click(
fn=get_feedback, inputs=[yaml_display], outputs=[feedback_display]
)
submit_btn.click(
fn=submit_eval_card,
inputs=[yaml_display, paper_url_input, repo_url_input],
outputs=[result_display, coverage_score, coverage_details],
)
refresh_btn.click(fn=refresh_gallery, outputs=[gallery_html, gallery_table])
# Initialize the gallery on app start
app.load(fn=refresh_gallery, outputs=[gallery_html, gallery_table])
# Launch the app
if __name__ == "__main__":
app.launch()