Spaces:

Anvilogic
/

CE-Typosquat-Detect

Sleeping

App Files Files Community

anvilogic-mikehart commited on Nov 20, 2024

Commit

840488b

1 Parent(s): 49d7e4e

Updating app

Browse files

Files changed (1) hide show

app.py +18 -15

app.py CHANGED Viewed

@@ -1,23 +1,26 @@
 import streamlit as st
 from sentence_transformers import CrossEncoder
 # Title and instructions
-st.title("Typosquatting Detection App")
 st.write("Enter two domains to check if one is a typosquatted variant of the other.")
-# Model selection
-# model_choice = st.selectbox("Choose a model for detection:", ["CE-typosquat-detect-Canine", "CE-typosquat-detect"])
-# # Load model after selection
-# if model_choice:
-#     model_path = f"./{model_choice}"
-#     model = CrossEncoder(model_path)
 model_choice="CE-typosquat-detect-Canine"
 model_path = f"./{model_choice}"
-model = CrossEncoder(model_path)
-domain = st.text_input("Enter the legitimate domain name:")
-typosquat = st.text_input("Enter the potentially typosquatted domain name:")
-threshold = st.slider("Set detection threshold", 0.0, 1.0, 0.5)
 # Typosquatting detection on button click
 if st.button("Check Typosquatting"):
@@ -26,9 +29,9 @@ if st.button("Check Typosquatting"):
         prediction = model.predict(inputs)[0]
         # Display result
-        if prediction > threshold:
-            st.success(f"The model predicts that '{typosquat}' is likely a typosquatted version of '{domain}' with a score of {prediction:.4f}.")
         else:
-            st.warning(f"The model predicts that '{typosquat}' is NOT likely a typosquatted version of '{domain}' with a score of {prediction:.4f}.")
     else:
         st.error("Please enter both a legitimate domain and a potentially typosquatted domain.")

 import streamlit as st
 from sentence_transformers import CrossEncoder
+@st.cache_resource
+def load_model(model_path) -> CrossEncoder:
+    return CrossEncoder(model_path)
 # Title and instructions
+st.title("Typosquatting Detection using CrossEncoders")
+st.markdown("Nowadays LLMs might feel like the reflexive first choice to solve tasks like typosquatting that require "
+            "some reasoning capability to determine if one domain is spelled in such a way to look like another.  "
+            "What we found was that we could fine tune an encoder-decoder model, but CrossEncoders performed equally as well "
+            "with a smaller footprint in size and complexity.  CrossEncoders were orginally built to compare two sentences "
+            "at the same time.  Here we use the same technique to compare two domains simultaneously.")
 st.write("Enter two domains to check if one is a typosquatted variant of the other.")
 model_choice="CE-typosquat-detect-Canine"
 model_path = f"./{model_choice}"
+model = load_model(model_path)
+domain = st.text_input("Enter the legitimate domain name:", value="office365.com")
+typosquat = st.text_input("Enter the potentially typosquatted domain name:", value="0ffice356.co")
 # Typosquatting detection on button click
 if st.button("Check Typosquatting"):
         prediction = model.predict(inputs)[0]
         # Display result
+        if prediction > 0.5:
+            st.success(f"The model predicts that '{typosquat}' is likely a typosquatted version of '{domain}' with a score of {prediction * 100:.2f} out of 100.")
         else:
+            st.warning(f"The model predicts that '{typosquat}' is NOT likely a typosquatted version of '{domain}' with a score of {prediction * 100:.2f} out of 100.")
     else:
         st.error("Please enter both a legitimate domain and a potentially typosquatted domain.")