Spaces:
Sleeping
Sleeping
import streamlit as st | |
from sentence_transformers import CrossEncoder | |
def load_model(model_path) -> CrossEncoder: | |
return CrossEncoder(model_path) | |
# Title and instructions | |
st.title("Typosquatting Detection using CrossEncoders") | |
st.markdown("Nowadays LLMs might feel like the reflexive first choice to solve tasks like typosquatting that require " | |
"some reasoning capability to determine if one domain is spelled in such a way to look like another. " | |
"What we found was that we could fine tune an encoder-decoder model, but CrossEncoders performed equally as well " | |
"with a smaller footprint in size and complexity. CrossEncoders were orginally built to compare two sentences " | |
"at the same time. Here we use the same technique to compare two domains simultaneously.") | |
st.write("Enter two domains to check if one is a typosquatted variant of the other.") | |
model_choice="CE-typosquat-detect-Canine" | |
model_path = f"./{model_choice}" | |
model = load_model(model_path) | |
domain = st.text_input("Enter the legitimate domain name:", value="office365.com") | |
typosquat = st.text_input("Enter the potentially typosquatted domain name:", value="0ffice356.co") | |
# Typosquatting detection on button click | |
if st.button("Check Typosquatting"): | |
if domain and typosquat: | |
inputs = [(typosquat, domain)] | |
prediction = model.predict(inputs)[0] | |
# Display result | |
if prediction > 0.5: | |
st.success(f"The model predicts that '{typosquat}' is likely a typosquatted version of '{domain}' with a score of {prediction * 100:.2f} out of 100.") | |
else: | |
st.warning(f"The model predicts that '{typosquat}' is NOT likely a typosquatted version of '{domain}' with a score of {prediction * 100:.2f} out of 100.") | |
else: | |
st.error("Please enter both a legitimate domain and a potentially typosquatted domain.") | |