Update app.py
Browse files
app.py
CHANGED
@@ -4,28 +4,23 @@ import pandas as pd
|
|
4 |
import numpy as np
|
5 |
from ast import literal_eval
|
6 |
|
7 |
-
|
|
|
8 |
model_choice = st.selectbox("Select the embedding model:", ["", "Embedder-typosquat-detect-Canine", "Embedder-typosquat-detect"], index=0)
|
9 |
|
10 |
-
# Load the model only if a model is selected
|
11 |
if model_choice:
|
12 |
model = SentenceTransformer(f"./{model_choice}")
|
13 |
|
14 |
-
# Load the domains and embeddings
|
15 |
domains_df = pd.read_csv(f'./{model_choice}/domains_embs.csv')
|
16 |
domains_df.embedding = domains_df.embedding.apply(literal_eval)
|
17 |
corpus_domains = domains_df.domain.to_list()
|
18 |
corpus_embeddings = np.stack(domains_df.embedding.values).astype(np.float32) # Ensure embeddings are float32
|
19 |
|
20 |
-
# Streamlit App
|
21 |
-
st.title("Mining Potential Legitimate Domains from a Typosquatted Domain")
|
22 |
st.write("Enter a potential typosquatted domain and select the number of top results to retrieve.")
|
23 |
|
24 |
-
# User Inputs
|
25 |
domain = st.text_input("Potential Typosquatted Domain")
|
26 |
top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=5, step=1)
|
27 |
|
28 |
-
# Button to trigger search
|
29 |
if st.button("Search for Legitimate Domains"):
|
30 |
if domain:
|
31 |
# Perform Semantic Search
|
@@ -33,12 +28,10 @@ if model_choice:
|
|
33 |
semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
|
34 |
ids = [r['corpus_id'] for r in semantic_res]
|
35 |
scores = [r['score'] for r in semantic_res]
|
36 |
-
|
37 |
-
# Create a DataFrame for the results
|
38 |
res_df = domains_df.loc[ids, ['domain']].copy()
|
39 |
res_df['score'] = scores
|
40 |
|
41 |
-
# Display the result DataFrame
|
42 |
st.write("Mined Domains:")
|
43 |
st.dataframe(res_df)
|
44 |
else:
|
|
|
4 |
import numpy as np
|
5 |
from ast import literal_eval
|
6 |
|
7 |
+
|
8 |
+
st.title("Mining Potential Legitimate Domains from a Typosquatted Domain")
|
9 |
model_choice = st.selectbox("Select the embedding model:", ["", "Embedder-typosquat-detect-Canine", "Embedder-typosquat-detect"], index=0)
|
10 |
|
|
|
11 |
if model_choice:
|
12 |
model = SentenceTransformer(f"./{model_choice}")
|
13 |
|
|
|
14 |
domains_df = pd.read_csv(f'./{model_choice}/domains_embs.csv')
|
15 |
domains_df.embedding = domains_df.embedding.apply(literal_eval)
|
16 |
corpus_domains = domains_df.domain.to_list()
|
17 |
corpus_embeddings = np.stack(domains_df.embedding.values).astype(np.float32) # Ensure embeddings are float32
|
18 |
|
|
|
|
|
19 |
st.write("Enter a potential typosquatted domain and select the number of top results to retrieve.")
|
20 |
|
|
|
21 |
domain = st.text_input("Potential Typosquatted Domain")
|
22 |
top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=5, step=1)
|
23 |
|
|
|
24 |
if st.button("Search for Legitimate Domains"):
|
25 |
if domain:
|
26 |
# Perform Semantic Search
|
|
|
28 |
semantic_res = util.semantic_search(query_emb, corpus_embeddings, top_k=top_k)[0]
|
29 |
ids = [r['corpus_id'] for r in semantic_res]
|
30 |
scores = [r['score'] for r in semantic_res]
|
31 |
+
|
|
|
32 |
res_df = domains_df.loc[ids, ['domain']].copy()
|
33 |
res_df['score'] = scores
|
34 |
|
|
|
35 |
st.write("Mined Domains:")
|
36 |
st.dataframe(res_df)
|
37 |
else:
|