import streamlit as st
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import difflib
import requests
import os
import json
FIREBASE_URL = os.getenv("FIREBASE_URL")
def fetch_from_firebase(model_id):
response = requests.get(f"{FIREBASE_URL}/model_structures/{model_id}.json")
if response.status_code == 200:
return response.json()
return None
def save_to_firebase(model_id, structure):
response = requests.put(f"{FIREBASE_URL}/model_structures/{model_id}.json", data=json.dumps(structure))
return response.status_code == 200
def get_model_structure(model_id):
structure = fetch_from_firebase(model_id)
if structure:
return structure
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="cpu",
)
structure = {k: str(v.shape) for k, v in model.state_dict().items()}
save_to_firebase(model_id, structure)
return structure
def compare_structures(struct1, struct2):
struct1_lines = [f"{k}: {v}" for k, v in struct1.items()]
struct2_lines = [f"{k}: {v}" for k, v in struct2.items()]
diff = difflib.ndiff(struct1_lines, struct2_lines)
return diff
def display_diff(diff):
left_lines = []
right_lines = []
diff_found = False
for line in diff:
if line.startswith('- '):
left_lines.append(f'{line[2:]}')
right_lines.append('')
diff_found = True
elif line.startswith('+ '):
right_lines.append(f'{line[2:]}')
left_lines.append('')
diff_found = True
elif line.startswith(' '):
left_lines.append(line[2:])
right_lines.append(line[2:])
else:
pass
left_html = "
".join(left_lines)
right_html = "
".join(right_lines)
return left_html, right_html, diff_found
# Set Streamlit page configuration to wide mode
st.set_page_config(layout="wide")
# Apply custom CSS for wider layout
st.markdown(
"""
""",
unsafe_allow_html=True
)
st.title("Model Structure Comparison Tool")
model_id1 = st.text_input("Enter the first HuggingFace Model ID")
model_id2 = st.text_input("Enter the second HuggingFace Model ID")
compare_button_clicked = st.button("Compare Models", key="compare_button")
if compare_button_clicked:
st.session_state["compare_button_disabled"] = True
if "compare_button_disabled" not in st.session_state:
st.session_state["compare_button_disabled"] = False
if st.session_state["compare_button_disabled"]:
with st.spinner('Comparing models and loading tokenizers...'):
if model_id1 and model_id2:
struct1 = get_model_structure(model_id1)
struct2 = get_model_structure(model_id2)
diff = compare_structures(struct1, struct2)
left_html, right_html, diff_found = display_diff(diff)
st.write("### Comparison Result")
if not diff_found:
st.success("The model structures are identical.")
col1, col2 = st.columns([1.5, 1.5]) # Adjust the ratio to make columns wider
with col1:
st.write("### Model 1")
st.markdown(left_html, unsafe_allow_html=True)
with col2:
st.write("### Model 2")
st.markdown(right_html, unsafe_allow_html=True)
# Tokenizer verification
try:
tokenizer1 = AutoTokenizer.from_pretrained(model_id1)
tokenizer2 = AutoTokenizer.from_pretrained(model_id2)
st.write(f"**{model_id1} Tokenizer Vocab Size**: {tokenizer1.vocab_size}")
st.write(f"**{model_id2} Tokenizer Vocab Size**: {tokenizer2.vocab_size}")
except Exception as e:
st.error(f"Error loading tokenizers: {e}")
else:
st.error("Please enter both model IDs.")
st.session_state["compare_button_disabled"] = False
else:
st.button("Compare Models", key="compare_button_disabled", disabled=True)