import streamlit as st import numpy as np from html import escape import torch from transformers import RobertaModel, AutoTokenizer @st.cache(show_spinner=False, hash_funcs={AutoTokenizer: lambda _: None, RobertaModel: lambda _: None, dict: lambda _: None}) def load(): tokenizer = AutoTokenizer.from_pretrained('SajjadAyoubi/clip-fa-text') text_encoder = RobertaModel.from_pretrained('SajjadAyoubi/clip-fa-text').eval() image_embeddings = torch.load('embedding.pt') links = np.load('data.npy', allow_pickle=True) return tokenizer, text_encoder, links, image_embeddings tokenizer, text_encoder, links, image_embeddings = load() def get_html(url_list): html = "

" for url in url_list: html2 = f"

" html = html + html2 html += "

" return html st.cache(show_spinner=False) def image_search(query, top_k=10): with torch.no_grad(): text_embedding = text_encoder(**tokenizer(query, return_tensors='pt')).pooler_output values, indices = torch.mm(image_embeddings, text_embedding.T).sort(descending=True) return [links[i] for i in indices[:top_k]] description = ''' # Persian (fa) image search - Enter your query and hit enter Built with [CLIP-fa](https://github.com/sajjjadayobi/CLIPfa) model and 25k images from [Unsplash](https://unsplash.com/) ''' def main(): st.markdown(''' ''', unsafe_allow_html=True) st.sidebar.markdown(description) _, c, _ = st.columns((1, 3, 1)) query = c.text_input('Search Box (type in fa)', value='گل صورتی') if len(query) > 0: results = image_search(query) st.markdown(get_html(results), unsafe_allow_html=True) if __name__ == '__main__': main()