Spaces:
Sleeping
Sleeping
HaggiVaggi
commited on
Commit
•
cc18cb2
1
Parent(s):
5bd5beb
Update pages/Подбор фильмов по описанию✏️🔍.py
Browse files
pages/Подбор фильмов по описанию✏️🔍.py
CHANGED
@@ -21,19 +21,14 @@ def load_model():
|
|
21 |
model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
|
22 |
return model
|
23 |
|
24 |
-
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
tokens = _tokenizer(description, return_tensors="pt")
|
29 |
-
with torch.no_grad():
|
30 |
-
outputs = _model(**tokens)
|
31 |
-
embeddings = outputs.last_hidden_state.mean(dim=1)
|
32 |
-
return embeddings.cpu().numpy().astype('float32')
|
33 |
|
34 |
st.header("Подбор фильмов по описанию ✏️🔍")
|
35 |
|
36 |
# Загрузка данных
|
|
|
37 |
df = load_data('data/final_data.csv')
|
38 |
embeddings_array, index = embedding_and_index()
|
39 |
model = load_model()
|
@@ -43,7 +38,12 @@ user_input = st.text_input("Введите описание фильма:", valu
|
|
43 |
|
44 |
if st.button("Искать🔍🎦"):
|
45 |
if user_input:
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
47 |
# Векторизация введенного запроса с использованием переданных tokenizer и model
|
48 |
input_embedding = encode_description(user_input, tokenizer, model)
|
49 |
|
|
|
21 |
model = AutoModel.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
|
22 |
return model
|
23 |
|
|
|
24 |
|
25 |
+
|
26 |
+
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
st.header("Подбор фильмов по описанию ✏️🔍")
|
29 |
|
30 |
# Загрузка данных
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased-sentence")
|
32 |
df = load_data('data/final_data.csv')
|
33 |
embeddings_array, index = embedding_and_index()
|
34 |
model = load_model()
|
|
|
38 |
|
39 |
if st.button("Искать🔍🎦"):
|
40 |
if user_input:
|
41 |
+
def encode_description(description, tokenizer, model):
|
42 |
+
tokens = tokenizer(description, return_tensors="pt")
|
43 |
+
with torch.no_grad():
|
44 |
+
outputs = model(**tokens)
|
45 |
+
embeddings = outputs.last_hidden_state.mean(dim=1)
|
46 |
+
return embeddings.cpu().numpy().astype('float32')
|
47 |
# Векторизация введенного запроса с использованием переданных tokenizer и model
|
48 |
input_embedding = encode_description(user_input, tokenizer, model)
|
49 |
|