Files changed (1) hide show
  1. app.py +18 -15
app.py CHANGED
@@ -1,23 +1,26 @@
1
  import streamlit as st
2
  from sentence_transformers import CrossEncoder
3
 
 
 
 
 
 
4
  # Title and instructions
5
- st.title("Typosquatting Detection App")
 
 
 
 
 
6
  st.write("Enter two domains to check if one is a typosquatted variant of the other.")
7
 
8
- # Model selection
9
- # model_choice = st.selectbox("Choose a model for detection:", ["CE-typosquat-detect-Canine", "CE-typosquat-detect"])
10
-
11
- # # Load model after selection
12
- # if model_choice:
13
- # model_path = f"./{model_choice}"
14
- # model = CrossEncoder(model_path)
15
  model_choice="CE-typosquat-detect-Canine"
16
  model_path = f"./{model_choice}"
17
- model = CrossEncoder(model_path)
18
- domain = st.text_input("Enter the legitimate domain name:")
19
- typosquat = st.text_input("Enter the potentially typosquatted domain name:")
20
- threshold = st.slider("Set detection threshold", 0.0, 1.0, 0.5)
21
 
22
  # Typosquatting detection on button click
23
  if st.button("Check Typosquatting"):
@@ -26,9 +29,9 @@ if st.button("Check Typosquatting"):
26
  prediction = model.predict(inputs)[0]
27
 
28
  # Display result
29
- if prediction > threshold:
30
- st.success(f"The model predicts that '{typosquat}' is likely a typosquatted version of '{domain}' with a score of {prediction:.4f}.")
31
  else:
32
- st.warning(f"The model predicts that '{typosquat}' is NOT likely a typosquatted version of '{domain}' with a score of {prediction:.4f}.")
33
  else:
34
  st.error("Please enter both a legitimate domain and a potentially typosquatted domain.")
 
1
  import streamlit as st
2
  from sentence_transformers import CrossEncoder
3
 
4
+ @st.cache_resource
5
+ def load_model(model_path) -> CrossEncoder:
6
+ return CrossEncoder(model_path)
7
+
8
+
9
  # Title and instructions
10
+ st.title("Typosquatting Detection using CrossEncoders")
11
+ st.markdown("Nowadays LLMs might feel like the reflexive first choice to solve tasks like typosquatting that require "
12
+ "some reasoning capability to determine if one domain is spelled in such a way to look like another. "
13
+ "What we found was that we could fine tune an encoder-decoder model, but CrossEncoders performed equally as well "
14
+ "with a smaller footprint in size and complexity. CrossEncoders were orginally built to compare two sentences "
15
+ "at the same time. Here we use the same technique to compare two domains simultaneously.")
16
  st.write("Enter two domains to check if one is a typosquatted variant of the other.")
17
 
 
 
 
 
 
 
 
18
  model_choice="CE-typosquat-detect-Canine"
19
  model_path = f"./{model_choice}"
20
+ model = load_model(model_path)
21
+ domain = st.text_input("Enter the legitimate domain name:", value="office365.com")
22
+ typosquat = st.text_input("Enter the potentially typosquatted domain name:", value="0ffice356.co")
23
+
24
 
25
  # Typosquatting detection on button click
26
  if st.button("Check Typosquatting"):
 
29
  prediction = model.predict(inputs)[0]
30
 
31
  # Display result
32
+ if prediction > 0.5:
33
+ st.success(f"The model predicts that '{typosquat}' is likely a typosquatted version of '{domain}' with a score of {prediction * 100:.2f} out of 100.")
34
  else:
35
+ st.warning(f"The model predicts that '{typosquat}' is NOT likely a typosquatted version of '{domain}' with a score of {prediction * 100:.2f} out of 100.")
36
  else:
37
  st.error("Please enter both a legitimate domain and a potentially typosquatted domain.")