|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Converts a text embedding file into a binary format for quicker loading.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
tf.flags.DEFINE_string('input', '', 'text file containing embeddings') |
|
tf.flags.DEFINE_string('output_vocab', '', 'output file for vocabulary') |
|
tf.flags.DEFINE_string('output_npy', '', 'output file for binary') |
|
FLAGS = tf.flags.FLAGS |
|
|
|
def main(_): |
|
vecs = [] |
|
vocab = [] |
|
with tf.gfile.GFile(FLAGS.input) as fh: |
|
for line in fh: |
|
parts = line.strip().split() |
|
vocab.append(parts[0]) |
|
vecs.append([float(x) for x in parts[1:]]) |
|
|
|
with tf.gfile.GFile(FLAGS.output_vocab, 'w') as fh: |
|
fh.write('\n'.join(vocab)) |
|
fh.write('\n') |
|
|
|
vecs = np.array(vecs, dtype=np.float32) |
|
np.save(FLAGS.output_npy, vecs, allow_pickle=False) |
|
|
|
|
|
if __name__ == '__main__': |
|
tf.app.run() |
|
|