Pendrokar commited on
Commit
1824d10
·
1 Parent(s): 84c02ea

autotokenize

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -10,9 +10,10 @@ import numpy as np
10
 
11
  from torchmoji.sentence_tokenizer import SentenceTokenizer
12
  from torchmoji.model_def import torchmoji_emojis
13
- from transformers import AutoModel
14
  model_name = "Pendrokar/TorchMoji"
15
  model = AutoModel.from_pretrained(model_name)
 
16
  model_path = model
17
  vocab_path = './' + model_name + "/vocabulary.json"
18
 
@@ -22,11 +23,11 @@ def top_elements(array, k):
22
 
23
  maxlen = 30
24
 
25
- print('Tokenizing using dictionary from {}'.format(vocab_path))
26
- with open(vocab_path, 'r') as f:
27
- vocabulary = json.load(f)
28
 
29
- st = SentenceTokenizer(vocabulary, maxlen)
30
 
31
  print('Loading model from {}.'.format(model_path))
32
  model = torchmoji_emojis(model_path)
 
10
 
11
  from torchmoji.sentence_tokenizer import SentenceTokenizer
12
  from torchmoji.model_def import torchmoji_emojis
13
+ from transformers import AutoModel, AutoTokenizer
14
  model_name = "Pendrokar/TorchMoji"
15
  model = AutoModel.from_pretrained(model_name)
16
+ tokenizer = AutoTokenizer.from_pretrained(modelName)
17
  model_path = model
18
  vocab_path = './' + model_name + "/vocabulary.json"
19
 
 
23
 
24
  maxlen = 30
25
 
26
+ # print('Tokenizing using dictionary from {}'.format(vocab_path))
27
+ # with open(vocab_path, 'r') as f:
28
+ # vocabulary = json.load(f)
29
 
30
+ st = SentenceTokenizer(tokenizer.get_added_vocab(), maxlen)
31
 
32
  print('Loading model from {}.'.format(model_path))
33
  model = torchmoji_emojis(model_path)