Spaces:
Runtime error
Runtime error
Shchushch
commited on
Commit
·
cce9310
1
Parent(s):
3288c6b
easy
Browse files- app.py +84 -0
- requirements.txt +64 -0
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import random as rd
|
4 |
+
import webbrowser as wb
|
5 |
+
import numpy as np
|
6 |
+
from find import find_similar,df,lems_eng,lems_rus,clean
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title="Умный поиск книг",
|
10 |
+
page_icon="📖",
|
11 |
+
layout="wide",
|
12 |
+
#initial_sidebar_state="expanded"
|
13 |
+
)
|
14 |
+
with st.expander('Исходный датафрейм'):
|
15 |
+
#print(list(df.columns))#df.columns())
|
16 |
+
columns= st.multiselect('Выберите колонки для отображения',options=list(df.columns),default=list(df.columns))
|
17 |
+
|
18 |
+
df.loc[:,columns]#'df.columns,default=df.columns)
|
19 |
+
#df.loc[:,columns]
|
20 |
+
st.title('Умный поиск книг')
|
21 |
+
#negability= st.checkbox('Негативный промт (beta)')
|
22 |
+
with st.form(key='search_form'):
|
23 |
+
|
24 |
+
input=st.text_input('Введите поисковый запрос','Пример запроса')
|
25 |
+
|
26 |
+
# if negability:
|
27 |
+
# neg=st.text_input('Введите отрицательный запрос')
|
28 |
+
|
29 |
+
|
30 |
+
search_but=st.form_submit_button('Искать')
|
31 |
+
|
32 |
+
items_per_page=st.number_input('Количество книг на странице',min_value=1,max_value=10,value=5)
|
33 |
+
# if search_but:
|
34 |
+
# st.session_state.clicked = True
|
35 |
+
#st.toast('Уфф')
|
36 |
+
#@st.cache_data(experimental_allow_widgets=True)
|
37 |
+
def books_show(books_idx,sim,n=items_per_page):
|
38 |
+
col=[]
|
39 |
+
books=df.copy().iloc[books_idx][:n]
|
40 |
+
for author in books['author']:
|
41 |
+
if author.find('Донцова')!=-1:
|
42 |
+
#st.toast('Уфф')
|
43 |
+
pass
|
44 |
+
books['sims']=sims[:n]
|
45 |
+
with st.expander('Датафрейм с результатами'):
|
46 |
+
books.loc[:,columns.__add__(['sims'])]
|
47 |
+
#print(books.index)
|
48 |
+
for i,book_id in enumerate(books_idx[:n]):
|
49 |
+
pic_col,text_col=st.columns([0.2,0.8])
|
50 |
+
'---'
|
51 |
+
|
52 |
+
url=books.loc[book_id][0]
|
53 |
+
#url
|
54 |
+
pic_col.image(books.loc[book_id,'image_url'],use_column_width=True)
|
55 |
+
pic_col.markdown(f'<a href={url} target="_blank">Ссылка на книгу</a>', unsafe_allow_html=True)
|
56 |
+
pic_col.markdown(f'**Степень похожести:** {books.loc[book_id,"sims"]:.4f}')
|
57 |
+
|
58 |
+
#col[i][0].button('Купить',key=books['page_url'][i],on_click=lambda: wb.open_new_tab(books['page_url'][i]))
|
59 |
+
|
60 |
+
text_col.markdown('## ' + books.loc[book_id, 'title'])
|
61 |
+
text_col.markdown('**Автор:** ' + books.loc[book_id, 'author'])
|
62 |
+
text_col.markdown('**Жанр:** ' + books.loc[book_id, 'genre'])
|
63 |
+
text_col.markdown('**Аннотация:** ' + books.loc[book_id, 'annotation'])
|
64 |
+
|
65 |
+
if search_but:
|
66 |
+
neg_mark=input.find(' -')
|
67 |
+
if neg_mark==-1:
|
68 |
+
cleaned_input=clean(lems_eng(lems_rus(input)))
|
69 |
+
else:
|
70 |
+
cleaned_input=clean(lems_eng(lems_rus(input[:neg_mark])))
|
71 |
+
cleaned_neg=clean(lems_eng(lems_rus(input[neg_mark+2:])))
|
72 |
+
#print(cleaned_neg.split(),df.loc[15390,'lemmatized'].split())
|
73 |
+
with st.spinner('Wait for it...'):
|
74 |
+
if neg_mark!=-1:
|
75 |
+
st.markdown(f'**Лемматизированный запрос:** {cleaned_input} \n\n **Лемматизированый негативный запрос:** {cleaned_neg}')
|
76 |
+
sims,books_idx=find_similar(cleaned_input,50)
|
77 |
+
for book in books_idx:
|
78 |
+
if any(word in cleaned_neg.split() for word in df.loc[book,'lemmatized'].split()):
|
79 |
+
books_idx=np.delete(books_idx,np.where(books_idx==book))
|
80 |
+
else:
|
81 |
+
st.markdown(f'**Лемматизированный запрос:** {cleaned_input}')
|
82 |
+
sims,books_idx=find_similar(input)
|
83 |
+
print(f'Похожести:\n{sims}\nИндексы:\n{books_idx}')
|
84 |
+
books_show(books_idx,sims)
|
requirements.txt
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.1.1
|
2 |
+
attrs==23.1.0
|
3 |
+
blinker==1.6.2
|
4 |
+
cachetools==5.3.1
|
5 |
+
certifi==2023.7.22
|
6 |
+
charset-normalizer==3.2.0
|
7 |
+
click==8.1.7
|
8 |
+
faiss-cpu==1.7.4
|
9 |
+
filelock==3.12.3
|
10 |
+
fsspec==2023.6.0
|
11 |
+
gitdb==4.0.10
|
12 |
+
GitPython==3.1.33
|
13 |
+
huggingface-hub==0.16.4
|
14 |
+
idna==3.4
|
15 |
+
importlib-metadata==6.8.0
|
16 |
+
Jinja2==3.1.2
|
17 |
+
joblib==1.3.2
|
18 |
+
jsonschema==4.19.0
|
19 |
+
jsonschema-specifications==2023.7.1
|
20 |
+
markdown-it-py==3.0.0
|
21 |
+
MarkupSafe==2.1.3
|
22 |
+
mdurl==0.1.2
|
23 |
+
mpmath==1.3.0
|
24 |
+
networkx==3.1
|
25 |
+
nltk==3.8.1
|
26 |
+
numpy==1.25.2
|
27 |
+
packaging==23.1
|
28 |
+
pandas==2.1.0
|
29 |
+
Pillow==9.5.0
|
30 |
+
protobuf==4.24.2
|
31 |
+
pyarrow==13.0.0
|
32 |
+
pydeck==0.8.0
|
33 |
+
Pygments==2.16.1
|
34 |
+
Pympler==1.0.1
|
35 |
+
pymystem3==0.2.0
|
36 |
+
python-dateutil==2.8.2
|
37 |
+
pytz==2023.3
|
38 |
+
pytz-deprecation-shim==0.1.0.post0
|
39 |
+
PyYAML==6.0.1
|
40 |
+
referencing==0.30.2
|
41 |
+
regex==2023.8.8
|
42 |
+
requests==2.31.0
|
43 |
+
rich==13.5.2
|
44 |
+
rpds-py==0.10.0
|
45 |
+
safetensors==0.3.3
|
46 |
+
six==1.16.0
|
47 |
+
smmap==5.0.0
|
48 |
+
streamlit==1.26.0
|
49 |
+
sympy==1.12
|
50 |
+
tenacity==8.2.3
|
51 |
+
tokenizers==0.13.3
|
52 |
+
toml==0.10.2
|
53 |
+
toolz==0.12.0
|
54 |
+
torch==2.0.1
|
55 |
+
tornado==6.3.3
|
56 |
+
tqdm==4.66.1
|
57 |
+
transformers==4.32.1
|
58 |
+
typing_extensions==4.7.1
|
59 |
+
tzdata==2023.3
|
60 |
+
tzlocal==4.3.1
|
61 |
+
urllib3==2.0.4
|
62 |
+
validators==0.21.2
|
63 |
+
watchdog==3.0.0
|
64 |
+
zipp==3.16.2
|