web-phishing-detection

Sleeping

rmdhirr commited on Jun 16, 2024

Commit

e2e2b90

verified ·

1 Parent(s): 90e22e2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# CODE BY RAMADHIRRA! DON'T USE WITHOUT PERMISSION!
 import gradio as gr
 import tensorflow as tf
 import numpy as np
@@ -26,12 +25,20 @@ nltk.download('wordnet')
 STOPWORDS = set(stopwords.words('english'))
 lemmatizer = WordNetLemmatizer()
 def preprocess_url(url):
     url = url.lower()
     url = re.sub(r'https?://', '', url)
     url = re.sub(r'www\.', '', url)
     url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
     url = re.sub(r'\s+', ' ', url).strip()
     tokens = word_tokenize(url)
     tokens = [word for word in tokens if word not in STOPWORDS]
     tokens = [lemmatizer.lemmatize(word) for word in tokens]

 import gradio as gr
 import tensorflow as tf
 import numpy as np
 STOPWORDS = set(stopwords.words('english'))
 lemmatizer = WordNetLemmatizer()
+def normalize_length(url, target_length=50):
+    if len(url) < target_length:
+        url = url + " " * (target_length - len(url))
+    else:
+        url = url[:target_length]
+    return url
 def preprocess_url(url):
     url = url.lower()
     url = re.sub(r'https?://', '', url)
     url = re.sub(r'www\.', '', url)
     url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
     url = re.sub(r'\s+', ' ', url).strip()
+    url = normalize_length(url)
     tokens = word_tokenize(url)
     tokens = [word for word in tokens if word not in STOPWORDS]
     tokens = [lemmatizer.lemmatize(word) for word in tokens]