Spaces:
Running
Running
add python-version, update gitignore, update vectorizer to use cached gensim-data if available
Browse files
.gitignore
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
.DS_Store
|
2 |
-
.env
|
|
|
|
|
|
1 |
.DS_Store
|
2 |
+
.env
|
3 |
+
__pycache__/*
|
4 |
+
gensim-data/*
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.11.13
|
__pycache__/asl_gloss.cpython-311.pyc
DELETED
Binary file (14.3 kB)
|
|
__pycache__/document_parsing.cpython-311.pyc
DELETED
Binary file (15.2 kB)
|
|
__pycache__/document_parsing.cpython-313.pyc
DELETED
Binary file (10.6 kB)
|
|
__pycache__/vectorizer.cpython-311.pyc
DELETED
Binary file (7.07 kB)
|
|
vectorizer.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gensim
|
2 |
import gensim.downloader
|
|
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
import os
|
@@ -18,8 +19,16 @@ class Vectorizer:
|
|
18 |
"""
|
19 |
Returns a KeyedVector object loaded from gensim
|
20 |
"""
|
|
|
21 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
kv = gensim.downloader.load(model_name) # returns a keyedvector
|
|
|
23 |
return kv
|
24 |
except Exception as e:
|
25 |
print(f"Unable to load embedding model from gensim: {e}")
|
|
|
1 |
import gensim
|
2 |
import gensim.downloader
|
3 |
+
from gensim.models import KeyedVectors
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
import os
|
|
|
19 |
"""
|
20 |
Returns a KeyedVector object loaded from gensim
|
21 |
"""
|
22 |
+
model_path = os.path.join(os.getcwd(), 'gensim-data', 'GoogleNews-vectors-negative300.bin.gz')
|
23 |
try:
|
24 |
+
print(f"Loading model from {model_path}")
|
25 |
+
kv = KeyedVectors.load_word2vec_format(model_path, binary=True)
|
26 |
+
print("Word2Vec model loaded successfully as KeyedVectors object.")
|
27 |
+
return kv
|
28 |
+
except FileNotFoundError:
|
29 |
+
print(f"Error: Model file not found at {model_path}. Trying to download...")
|
30 |
kv = gensim.downloader.load(model_name) # returns a keyedvector
|
31 |
+
print("Word2Vec model loaded successfully as KeyedVectors object.")
|
32 |
return kv
|
33 |
except Exception as e:
|
34 |
print(f"Unable to load embedding model from gensim: {e}")
|