File size: 2,862 Bytes
4f12085
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2cf659
 
4f12085
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import pandas as pd, numpy as np
from html import escape
import os
import torch
from transformers import RobertaModel, AutoTokenizer


@st.cache(show_spinner=False,
          hash_funcs={text_encoder: lambda _: None,
                      tokenizer: lambda _: None,
                      dict: lambda _: None})
def load():
    text_encoder = RobertaModel.from_pretrained('SajjadAyoubi/clip-fa-text')
    tokenizer = AutoTokenizer.from_pretrained('SajjadAyoubi/clip-fa-text')
    df = pd.read_csv('data.csv')
    image_embeddings = np.load('embeddings.npy')
    return text_encoder, tokenizer, df, image_embeddings


text_encoder, tokenizer, df, image_embeddings = load()


def get_html(url_list, height=224):
    html = "<div style='margin-top: 20px; max-width: 1200px; display: flex; flex-wrap: wrap; justify-content: space-evenly'>"
    for url, link in url_list:
        html2 = f"<img style='height: {height}px; margin: 5px' src='{escape(url)}'>"
        if len(link) > 0:
            html2 = f"<a href='{escape(link)}' target='_blank'>" + \
                html2 + "</a>"
                
        html = html + html2
    html += "</div>"
    return html


st.cache(show_spinner=False)
def image_search(query, top_k=8):
    with torch.no_grad():
        text_embedding = text_encoder(**tokenizer(query, return_tensors='pt')).pooler_output
    values, indices = torch.cosine_similarity(text_embedding, image_embeddings).sort(descending=True)
    return [(df.iloc[i]['path'], df.iloc[i]['link']) for i in indices[:top_k]]


description = '''
# Semantic image search :)
'''


def main():
    st.markdown('''
              <style>
              .block-container{
                max-width: 1200px;
              }
              div.row-widget.stRadio > div{
                flex-direction:row;
                display: flex;
                justify-content: center;
              }
              div.row-widget.stRadio > div > label{
                margin-left: 5px;
                margin-right: 5px;
              }
              section.main>div:first-child {
                padding-top: 0px;
              }
              section:not(.main)>div:first-child {
                padding-top: 30px;
              }
              div.reportview-container > section:first-child{
                max-width: 320px;
              }
              #MainMenu {
                visibility: hidden;
              }
              footer {
                visibility: hidden;
              }
              </style>''',
                unsafe_allow_html=True)
    st.sidebar.markdown(description)
    _, c, _ = st.columns((1, 3, 1))
    query = c.text_input('', value='clouds at sunset')
    if len(query) > 0:
        results = image_search(query)
        st.markdown(get_html(results), unsafe_allow_html=True)


if __name__ == '__main__':
    main()