asynchronousai commited on
Commit
4d7bc75
·
verified ·
1 Parent(s): 6062294

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -1,13 +1,16 @@
1
  import gradio as gr
2
  import io
 
 
3
  def load_vectors(fname):
4
  fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
5
  data = {}
6
  for line in fin:
7
  tokens = line.rstrip().split(' ')
8
- data[tokens[0]] = map(float, tokens[1:])
9
  del fin
10
  return data, sorted(data.keys(), key=len, reverse=True)
 
11
  vectors, sorted_vector = load_vectors('wiki-news-300d-1M.vec')
12
 
13
  class TrieNode:
@@ -39,9 +42,10 @@ class Trie:
39
  current_pos += 1
40
 
41
  return longest_match
42
-
43
  def word2vec(word):
44
- return list(vectors[word])
 
45
  def tokenize(text):
46
  trie = Trie()
47
  for token in sorted_vector:
@@ -59,24 +63,24 @@ def tokenize(text):
59
  start += 1
60
 
61
  return result
 
62
  def paragraph2word(paragraph):
63
  tokens = tokenize(paragraph)
64
- mergedVector = []
 
 
 
 
65
 
66
- # Merge vectors
67
  for token in tokens:
68
  vector = word2vec(token)
69
- if len(mergedVector) == 0:
70
- mergedVector = vector
71
- else:
72
- for i in range(len(vector)):
73
- mergedVector[i] += vector[i]
74
 
75
  # Normalize
76
- for i in range(len(mergedVector)):
77
- mergedVector[i] /= len(tokens)
78
 
79
- return mergedVector
80
 
81
  demo = gr.Interface(fn=paragraph2word, inputs="text", outputs="text")
82
  demo.launch()
 
1
  import gradio as gr
2
  import io
3
+ import numpy as np
4
+
5
  def load_vectors(fname):
6
  fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
7
  data = {}
8
  for line in fin:
9
  tokens = line.rstrip().split(' ')
10
+ data[tokens[0]] = np.array(list(map(float, tokens[1:]))) # Convert to NumPy array
11
  del fin
12
  return data, sorted(data.keys(), key=len, reverse=True)
13
+
14
  vectors, sorted_vector = load_vectors('wiki-news-300d-1M.vec')
15
 
16
  class TrieNode:
 
42
  current_pos += 1
43
 
44
  return longest_match
45
+
46
  def word2vec(word):
47
+ return vectors[word]
48
+
49
  def tokenize(text):
50
  trie = Trie()
51
  for token in sorted_vector:
 
63
  start += 1
64
 
65
  return result
66
+
67
  def paragraph2word(paragraph):
68
  tokens = tokenize(paragraph)
69
+
70
+ if not tokens: # Handle case with no tokens found
71
+ return np.zeros(300).tolist() # Return a zero vector of appropriate dimension
72
+
73
+ merged_vector = np.zeros(300) # Assuming vectors are 300-dimensional
74
 
75
+ # Merge vectors using NumPy
76
  for token in tokens:
77
  vector = word2vec(token)
78
+ merged_vector += vector
 
 
 
 
79
 
80
  # Normalize
81
+ merged_vector /= len(tokens)
 
82
 
83
+ return merged_vector.tolist() # Convert back to list for output
84
 
85
  demo = gr.Interface(fn=paragraph2word, inputs="text", outputs="text")
86
  demo.launch()