Spaces:
Sleeping
Sleeping
Commit
·
071f0b8
1
Parent(s):
38972c8
Update pages/Подбор фильмов по описанию✏️🔍.py
Browse files
pages/Подбор фильмов по описанию✏️🔍.py
CHANGED
@@ -5,6 +5,37 @@ from transformers import AutoTokenizer, AutoModel
|
|
5 |
import faiss
|
6 |
import numpy as np
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
st.header("Подбор фильмов по описанию ✏️🔍")
|
9 |
|
10 |
|
|
|
5 |
import faiss
|
6 |
import numpy as np
|
7 |
|
8 |
+
@st.cache_data
|
9 |
+
def load_data(url):
|
10 |
+
df = pd.read_csv(url) # 👈 Download the data
|
11 |
+
return df
|
12 |
+
|
13 |
+
df = load_data('data/final_data.csv')
|
14 |
+
|
15 |
+
@st.cache_data
|
16 |
+
def embedding_and_index():
|
17 |
+
embeddings_array = np.load('data/embeddings_final.npy')
|
18 |
+
index = faiss.read_index('data/desc_faiss_index_final.index')
|
19 |
+
return(embeddings_array, index)
|
20 |
+
|
21 |
+
embeddings_array, index = embedding_and_index()
|
22 |
+
|
23 |
+
@st.cache_resource
|
24 |
+
def load_tokenizer_and_model():
|
25 |
+
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
|
26 |
+
model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
|
27 |
+
return tokenizer, model
|
28 |
+
|
29 |
+
tokenizer, model = load_tokenizer_and_model()
|
30 |
+
|
31 |
+
@st.cache_resource
|
32 |
+
def encode_description(description, tokenizer, model):
|
33 |
+
tokens = tokenizer(description, return_tensors="pt")
|
34 |
+
with torch.no_grad():
|
35 |
+
outputs = model(**tokens)
|
36 |
+
embeddings = outputs.last_hidden_state.mean(dim=1)
|
37 |
+
return embeddings.cpu().numpy().astype('float32')
|
38 |
+
|
39 |
st.header("Подбор фильмов по описанию ✏️🔍")
|
40 |
|
41 |
|