LMartinezEXEX commited on
Commit
0c56fea
·
1 Parent(s): e7eeec5

Added other approaches to load embeddings from .vec and .bin extended files

Browse files
Files changed (1) hide show
  1. modules/model_embbeding.py +44 -5
modules/model_embbeding.py CHANGED
@@ -3,6 +3,7 @@ from memory_profiler import profile
3
  from sklearn.neighbors import NearestNeighbors
4
  from sklearn.decomposition import PCA
5
  from gensim.models import KeyedVectors
 
6
  from typing import List, Any
7
  import os
8
  import pandas as pd
@@ -89,12 +90,50 @@ class Embedding:
89
  pca = PCA(
90
  n_components=2
91
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- model = KeyedVectors.load_word2vec_format(
94
- fname=path,
95
- binary=path.endswith('.bin'),
96
- limit=limit
97
- )
98
 
99
  # Cased Vocab
100
  cased_words = model.index_to_key
 
3
  from sklearn.neighbors import NearestNeighbors
4
  from sklearn.decomposition import PCA
5
  from gensim.models import KeyedVectors
6
+ from gensim.models.fasttext import load_facebook_vectors
7
  from typing import List, Any
8
  import os
9
  import pandas as pd
 
90
  pca = PCA(
91
  n_components=2
92
  )
93
+
94
+ model = None
95
+ binary = path.endswith('.bin')
96
+
97
+ # Should be enough for all .vec files
98
+ try:
99
+ model = KeyedVectors.load_word2vec_format(
100
+ fname=path,
101
+ binary=binary,
102
+ limit=limit,
103
+ unicode_errors='ignore'
104
+ )
105
+
106
+ except UnicodeDecodeError:
107
+ pass #Try other way of loading
108
+
109
+ # If it's a .bin Fasttext saved model
110
+ if model is None and binary:
111
+ print('Error during load of provided model. \
112
+ Using different approaches. \
113
+ The limit parameter won\'t be taken into account'
114
+ )
115
+
116
+ # If it's a Fasttext model
117
+ try:
118
+ model = load_facebook_vectors(
119
+ path=path
120
+ )
121
+
122
+ except UnicodeDecodeError:
123
+ pass
124
+
125
+ if model is None:
126
+ # Last chance, if it is a Word2Vec model
127
+ try:
128
+ model = KeyedVectors.load(
129
+ fname=path
130
+ )
131
+
132
+ except:
133
+ pass
134
 
135
+ if model is None:
136
+ raise Exception(f'Can\'t load {path} after multiple approaches.')
 
 
 
137
 
138
  # Cased Vocab
139
  cased_words = model.index_to_key