rmdhirr commited on
Commit
e2e2b90
·
verified ·
1 Parent(s): 90e22e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # CODE BY RAMADHIRRA! DON'T USE WITHOUT PERMISSION!
2
  import gradio as gr
3
  import tensorflow as tf
4
  import numpy as np
@@ -26,12 +25,20 @@ nltk.download('wordnet')
26
  STOPWORDS = set(stopwords.words('english'))
27
  lemmatizer = WordNetLemmatizer()
28
 
 
 
 
 
 
 
 
29
  def preprocess_url(url):
30
  url = url.lower()
31
  url = re.sub(r'https?://', '', url)
32
  url = re.sub(r'www\.', '', url)
33
  url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
34
  url = re.sub(r'\s+', ' ', url).strip()
 
35
  tokens = word_tokenize(url)
36
  tokens = [word for word in tokens if word not in STOPWORDS]
37
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
 
 
1
  import gradio as gr
2
  import tensorflow as tf
3
  import numpy as np
 
25
  STOPWORDS = set(stopwords.words('english'))
26
  lemmatizer = WordNetLemmatizer()
27
 
28
+ def normalize_length(url, target_length=50):
29
+ if len(url) < target_length:
30
+ url = url + " " * (target_length - len(url))
31
+ else:
32
+ url = url[:target_length]
33
+ return url
34
+
35
  def preprocess_url(url):
36
  url = url.lower()
37
  url = re.sub(r'https?://', '', url)
38
  url = re.sub(r'www\.', '', url)
39
  url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
40
  url = re.sub(r'\s+', ' ', url).strip()
41
+ url = normalize_length(url)
42
  tokens = word_tokenize(url)
43
  tokens = [word for word in tokens if word not in STOPWORDS]
44
  tokens = [lemmatizer.lemmatize(word) for word in tokens]