chgrdj commited on
Commit
ec88281
·
verified ·
1 Parent(s): a73e92a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -4,28 +4,23 @@ import pandas as pd
4
  import numpy as np
5
  from ast import literal_eval
6
 
7
- # Dropdown to select the model
 
8
  model_choice = st.selectbox("Select the embedding model:", ["", "Embedder-typosquat-detect-Canine", "Embedder-typosquat-detect"], index=0)
9
 
10
- # Load the model only if a model is selected
11
  if model_choice:
12
  model = SentenceTransformer(f"./{model_choice}")
13
 
14
- # Load the domains and embeddings
15
  domains_df = pd.read_csv(f'./{model_choice}/domains_embs.csv')
16
  domains_df.embedding = domains_df.embedding.apply(literal_eval)
17
  corpus_domains = domains_df.domain.to_list()
18
  corpus_embeddings = np.stack(domains_df.embedding.values).astype(np.float32) # Ensure embeddings are float32
19
 
20
- # Streamlit App
21
- st.title("Mining Potential Legitimate Domains from a Typosquatted Domain")
22
  st.write("Enter a potential typosquatted domain and select the number of top results to retrieve.")
23
 
24
- # User Inputs
25
  domain = st.text_input("Potential Typosquatted Domain")
26
  top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=5, step=1)
27
 
28
- # Button to trigger search
29
  if st.button("Search for Legitimate Domains"):
30
  if domain:
31
  # Perform Semantic Search
@@ -33,12 +28,10 @@ if model_choice:
33
  semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
34
  ids = [r['corpus_id'] for r in semantic_res]
35
  scores = [r['score'] for r in semantic_res]
36
-
37
- # Create a DataFrame for the results
38
  res_df = domains_df.loc[ids, ['domain']].copy()
39
  res_df['score'] = scores
40
 
41
- # Display the result DataFrame
42
  st.write("Mined Domains:")
43
  st.dataframe(res_df)
44
  else:
 
4
  import numpy as np
5
  from ast import literal_eval
6
 
7
+
8
+ st.title("Mining Potential Legitimate Domains from a Typosquatted Domain")
9
  model_choice = st.selectbox("Select the embedding model:", ["", "Embedder-typosquat-detect-Canine", "Embedder-typosquat-detect"], index=0)
10
 
 
11
  if model_choice:
12
  model = SentenceTransformer(f"./{model_choice}")
13
 
 
14
  domains_df = pd.read_csv(f'./{model_choice}/domains_embs.csv')
15
  domains_df.embedding = domains_df.embedding.apply(literal_eval)
16
  corpus_domains = domains_df.domain.to_list()
17
  corpus_embeddings = np.stack(domains_df.embedding.values).astype(np.float32) # Ensure embeddings are float32
18
 
 
 
19
  st.write("Enter a potential typosquatted domain and select the number of top results to retrieve.")
20
 
 
21
  domain = st.text_input("Potential Typosquatted Domain")
22
  top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=5, step=1)
23
 
 
24
  if st.button("Search for Legitimate Domains"):
25
  if domain:
26
  # Perform Semantic Search
 
28
  semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
29
  ids = [r['corpus_id'] for r in semantic_res]
30
  scores = [r['score'] for r in semantic_res]
31
+
 
32
  res_df = domains_df.loc[ids, ['domain']].copy()
33
  res_df['score'] = scores
34
 
 
35
  st.write("Mined Domains:")
36
  st.dataframe(res_df)
37
  else: