asynchronousai commited on
Commit
f621a6c
·
verified ·
1 Parent(s): 4d7bc75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -2,22 +2,11 @@ import gradio as gr
2
  import io
3
  import numpy as np
4
 
5
- def load_vectors(fname):
6
- fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
7
- data = {}
8
- for line in fin:
9
- tokens = line.rstrip().split(' ')
10
- data[tokens[0]] = np.array(list(map(float, tokens[1:]))) # Convert to NumPy array
11
- del fin
12
- return data, sorted(data.keys(), key=len, reverse=True)
13
-
14
- vectors, sorted_vector = load_vectors('wiki-news-300d-1M.vec')
15
-
16
  class TrieNode:
17
  def __init__(self):
18
  self.children = {}
19
  self.is_end_of_token = False
20
-
21
  class Trie:
22
  def __init__(self):
23
  self.root = TrieNode()
@@ -43,9 +32,18 @@ class Trie:
43
 
44
  return longest_match
45
 
46
- def word2vec(word):
47
- return vectors[word]
 
 
 
 
 
 
 
 
48
 
 
49
  def tokenize(text):
50
  trie = Trie()
51
  for token in sorted_vector:
@@ -64,23 +62,32 @@ def tokenize(text):
64
 
65
  return result
66
 
67
- def paragraph2word(paragraph):
 
 
68
  tokens = tokenize(paragraph)
69
 
 
70
  if not tokens: # Handle case with no tokens found
71
  return np.zeros(300).tolist() # Return a zero vector of appropriate dimension
72
 
73
  merged_vector = np.zeros(300) # Assuming vectors are 300-dimensional
74
-
75
  # Merge vectors using NumPy
76
- for token in tokens:
77
- vector = word2vec(token)
 
 
 
 
78
  merged_vector += vector
79
 
80
  # Normalize
 
81
  merged_vector /= len(tokens)
82
 
 
83
  return merged_vector.tolist() # Convert back to list for output
84
 
85
- demo = gr.Interface(fn=paragraph2word, inputs="text", outputs="text")
86
- demo.launch()
 
2
  import io
3
  import numpy as np
4
 
5
+ # Trie
 
 
 
 
 
 
 
 
 
 
6
  class TrieNode:
7
  def __init__(self):
8
  self.children = {}
9
  self.is_end_of_token = False
 
10
  class Trie:
11
  def __init__(self):
12
  self.root = TrieNode()
 
32
 
33
  return longest_match
34
 
35
+ # Vector Loader
36
+ def load_vectors(fname):
37
+ fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
38
+ data = {}
39
+ for line in fin:
40
+ tokens = line.rstrip().split(' ')
41
+ data[tokens[0]] = np.array(list(map(float, tokens[1:]))) # Convert to NumPy array
42
+ del fin
43
+ return data, sorted(data.keys(), key=len, reverse=True)
44
+ vectors, sorted_vector = load_vectors('wiki-news-300d-1M.vec')
45
 
46
+ # Tokenizer
47
  def tokenize(text):
48
  trie = Trie()
49
  for token in sorted_vector:
 
62
 
63
  return result
64
 
65
+ # Interface
66
+ def onInput(paragraph, progress = gr.Progress()):
67
+ progress(0, "Tokenizing...")
68
  tokens = tokenize(paragraph)
69
 
70
+ progress(0.1, "Initializing merged vector...")
71
  if not tokens: # Handle case with no tokens found
72
  return np.zeros(300).tolist() # Return a zero vector of appropriate dimension
73
 
74
  merged_vector = np.zeros(300) # Assuming vectors are 300-dimensional
75
+
76
  # Merge vectors using NumPy
77
+ totalTokens = len(tokens)
78
+ for ind, token in enumerate(tokens):
79
+ completion = 0.7*((ind+1)/totalTokens)
80
+ progress(0.1 + completion, f"Merging {token}, Token #{tokens.index(token)+1}/{len(tokens)}")
81
+
82
+ vector = vectors[token]
83
  merged_vector += vector
84
 
85
  # Normalize
86
+ progress(0.9, "Normalizing...")
87
  merged_vector /= len(tokens)
88
 
89
+ progress(1, "Converting to list...")
90
  return merged_vector.tolist() # Convert back to list for output
91
 
92
+ demo = gr.Interface(fn=onInput, inputs="text", outputs="text")
93
+ demo.launch()