HaggiVaggi commited on
Commit
071f0b8
·
1 Parent(s): 38972c8

Update pages/Подбор фильмов по описанию✏️🔍.py

Browse files
pages/Подбор фильмов по описанию✏️🔍.py CHANGED
@@ -5,6 +5,37 @@ from transformers import AutoTokenizer, AutoModel
5
  import faiss
6
  import numpy as np
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  st.header("Подбор фильмов по описанию ✏️🔍")
9
 
10
 
 
5
  import faiss
6
  import numpy as np
7
 
8
+ @st.cache_data
9
+ def load_data(url):
10
+ df = pd.read_csv(url) # 👈 Download the data
11
+ return df
12
+
13
+ df = load_data('data/final_data.csv')
14
+
15
+ @st.cache_data
16
+ def embedding_and_index():
17
+ embeddings_array = np.load('data/embeddings_final.npy')
18
+ index = faiss.read_index('data/desc_faiss_index_final.index')
19
+ return(embeddings_array, index)
20
+
21
+ embeddings_array, index = embedding_and_index()
22
+
23
+ @st.cache_resource
24
+ def load_tokenizer_and_model():
25
+ tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
26
+ model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
27
+ return tokenizer, model
28
+
29
+ tokenizer, model = load_tokenizer_and_model()
30
+
31
+ @st.cache_resource
32
+ def encode_description(description, tokenizer, model):
33
+ tokens = tokenizer(description, return_tensors="pt")
34
+ with torch.no_grad():
35
+ outputs = model(**tokens)
36
+ embeddings = outputs.last_hidden_state.mean(dim=1)
37
+ return embeddings.cpu().numpy().astype('float32')
38
+
39
  st.header("Подбор фильмов по описанию ✏️🔍")
40
 
41