import streamlit as st from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer @st.cache_resource def load_classifier(model_path: str): id2label = {0: "Safe", 1: "Unsafe"} label2id = {"Safe": 0, "Unsafe": 1} config = AutoConfig.from_pretrained(model_path, id2label=id2label, label2id=label2id) model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config) tokenizer = AutoTokenizer.from_pretrained(model_path) return pipeline("text-classification", model=model, tokenizer=tokenizer) def defang_url(url: str) -> str: """ Defangs the URL to prevent it from being clickable. This function replaces the protocol and dots. For example: https://example.com --> hxxps://example[.]com """ # Replace the protocol if url.startswith("https://"): url = url.replace("https://", "hxxps://") elif url.startswith("http://"): url = url.replace("http://", "hxxp://") # Replace periods in the rest of the URL return url.replace(".", "[.]") st.title("URL Typosquatting Detection with URLGuardian") st.markdown( "This app uses the **URLGuardian** classifier developed by Anvilogic to detect potential suspicious URL. " "Enter a URL to assess!" ) model_path = "./URLGuardian" classifier = load_classifier(model_path) url = st.text_input("Enter the URL:", value="example.com") if st.button("Check Safety of the url"): if url: result = classifier(url)[0] label = result["label"] score = result["score"] defanged_url = defang_url(url) if label=='Safe': st.success( f"The URL '{defanged_url}' is considered safe with a confidence of {score * 100:.2f}%." ) else: st.error( f"The URL '{defanged_url}' is considered suspicious with a confidence of {score * 100:.2f}%." ) # Optionally, you can display the full result for debugging purposes: st.write("Full classification output:", result) else: st.error("Please enter a URL.")