Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# CODE BY RAMADHIRRA! DON'T USE WITHOUT PERMISSION!
|
2 |
import gradio as gr
|
3 |
import tensorflow as tf
|
4 |
import numpy as np
|
@@ -26,12 +25,20 @@ nltk.download('wordnet')
|
|
26 |
STOPWORDS = set(stopwords.words('english'))
|
27 |
lemmatizer = WordNetLemmatizer()
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def preprocess_url(url):
|
30 |
url = url.lower()
|
31 |
url = re.sub(r'https?://', '', url)
|
32 |
url = re.sub(r'www\.', '', url)
|
33 |
url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
|
34 |
url = re.sub(r'\s+', ' ', url).strip()
|
|
|
35 |
tokens = word_tokenize(url)
|
36 |
tokens = [word for word in tokens if word not in STOPWORDS]
|
37 |
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
|
|
25 |
STOPWORDS = set(stopwords.words('english'))
|
26 |
lemmatizer = WordNetLemmatizer()
|
27 |
|
28 |
+
def normalize_length(url, target_length=50):
|
29 |
+
if len(url) < target_length:
|
30 |
+
url = url + " " * (target_length - len(url))
|
31 |
+
else:
|
32 |
+
url = url[:target_length]
|
33 |
+
return url
|
34 |
+
|
35 |
def preprocess_url(url):
|
36 |
url = url.lower()
|
37 |
url = re.sub(r'https?://', '', url)
|
38 |
url = re.sub(r'www\.', '', url)
|
39 |
url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
|
40 |
url = re.sub(r'\s+', ' ', url).strip()
|
41 |
+
url = normalize_length(url)
|
42 |
tokens = word_tokenize(url)
|
43 |
tokens = [word for word in tokens if word not in STOPWORDS]
|
44 |
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|