|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
import numpy as np |
|
import pickle |
|
from typing import Dict, List, Any |
|
import random |
|
from sentence_transformers import SentenceTransformer |
|
from qdrant_client import models, QdrantClient |
|
import emoji as em |
|
import warnings |
|
|
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
@st.cache_data(show_spinner=False) |
|
def load_dictionary(file_path: str) -> Dict[str, Dict[str, Any]]: |
|
"""Load the emoji dictionary from a pickle file.""" |
|
|
|
with open(file_path, 'rb') as file: |
|
emoji_dict = pickle.load(file) |
|
return emoji_dict |
|
|
|
|
|
|
|
@st.cache_resource(show_spinner=False) |
|
def load_encoder(model_name: str) -> SentenceTransformer: |
|
"""Load a sentence encoder model from Hugging Face Hub.""" |
|
|
|
sentence_encoder = SentenceTransformer(model_name) |
|
|
|
return sentence_encoder |
|
|
|
|
|
|
|
@st.cache_resource(show_spinner=False) |
|
def load_qdrant_client(emoji_dict: Dict[str, Dict[str, Any]]) -> QdrantClient: |
|
""" |
|
Load a Qdrant client and populate the database with embeddings. |
|
""" |
|
|
|
vector_DB_client = QdrantClient(":memory:") |
|
embedding_dict = { |
|
emoji: np.array(metadata['embedding']) |
|
for emoji, metadata in emoji_dict.items() |
|
} |
|
|
|
|
|
|
|
for emoji in list(emoji_dict): |
|
del emoji_dict[emoji]['embedding'] |
|
|
|
embedding_dim = next(iter(embedding_dict.values())).shape[0] |
|
|
|
|
|
vector_DB_client.create_collection( |
|
collection_name="EMOJIS", |
|
vectors_config=models.VectorParams( |
|
size=embedding_dim, |
|
distance=models.Distance.COSINE |
|
), |
|
) |
|
|
|
|
|
vector_DB_client.upload_points( |
|
collection_name="EMOJIS", |
|
points=[ |
|
models.PointStruct( |
|
id=idx, |
|
vector=embedding_dict[emoji].tolist(), |
|
payload=emoji_dict[emoji] |
|
) |
|
for idx, emoji in enumerate(emoji_dict) |
|
], |
|
) |
|
|
|
|
|
return vector_DB_client |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def retrieve_relevant_emojis( |
|
embedding_model: SentenceTransformer, |
|
vector_DB_client: QdrantClient, |
|
query: str) -> List[str]: |
|
""" |
|
Return similar emojis to the query using the sentence encoder and Qdrant. |
|
""" |
|
|
|
|
|
query_vector = embedding_model.encode(query).tolist() |
|
|
|
hits = vector_DB_client.search( |
|
collection_name="EMOJIS", |
|
query_vector=query_vector, |
|
limit=50, |
|
) |
|
|
|
search_emojis = [] |
|
|
|
|
|
for hit in hits: |
|
if hit.payload['Emoji'] not in search_emojis: |
|
search_emojis.append(hit.payload['Emoji']) |
|
|
|
return search_emojis |
|
|
|
|
|
def render_results( |
|
embedding_model: SentenceTransformer, |
|
vector_DB_client: QdrantClient, |
|
query: str, |
|
emojis_to_render: List[str] = None,) -> None: |
|
""" |
|
Render the search results in the Streamlit app. |
|
""" |
|
|
|
|
|
if emojis_to_render is None: |
|
emojis_to_render = retrieve_relevant_emojis( |
|
embedding_model, |
|
vector_DB_client, |
|
query |
|
) |
|
|
|
|
|
|
|
|
|
|
|
if emojis_to_render: |
|
|
|
st.markdown( |
|
'<h1 style="font-size: 60px">' + '\t'.join(emojis_to_render) + '</h1>', |
|
unsafe_allow_html=True |
|
) |
|
|
|
else: |
|
st.error("No results found.") |
|
|
|
def main(): |
|
|
|
|
|
example_queries = [ |
|
"Extraterrestrial form", |
|
"Exploration & discovery", |
|
"Happy birthday", |
|
"Love and peace", |
|
"Beyond the stars", |
|
"Great ambition", |
|
"Career growth", |
|
"Flightless bird", |
|
"Tropical vibes", |
|
"Gift of nature", |
|
"In the ocean ", |
|
"Spring awakening", |
|
"Autumn vibes", |
|
"In the garden", |
|
"In the desert", |
|
"Heart gesture", |
|
"Love is in the air", |
|
"In the mountains", |
|
"Extinct species", |
|
"Wonderful world", |
|
"Cool vibes", |
|
"Warm feelings", |
|
"Academic excellence", |
|
"Artistic expression", |
|
"Urban life", |
|
"Rural life", |
|
"Sign language", |
|
"Global communication", |
|
"International cooperation", |
|
"Worldwide connection", |
|
"Digital transformation", |
|
"AI-powered solutions", |
|
"New beginnings", |
|
"Innovation & creativity", |
|
"Scientific discovery", |
|
"Space exploration", |
|
"Sustainable development", |
|
"Climate change", |
|
"Environmental protection", |
|
"Healthy lifestyle", |
|
"Mental health", |
|
"Healthy food", |
|
"Healthy habits", |
|
"Fitness & wellness", |
|
"Mindfulness & meditation", |
|
"Emotional intelligence", |
|
"Personal growth", |
|
"Financial freedom", |
|
"Investment opportunities", |
|
"Economic growth", |
|
"Traditional crafts", |
|
"Folk music", |
|
"Cultural shock", |
|
"Illuminating thoughts", |
|
] |
|
|
|
|
|
|
|
|
|
model_name = 'paraphrase-multilingual-MiniLM-L12-v2' |
|
|
|
sentence_encoder = load_encoder(model_name) |
|
|
|
|
|
embedding_dict = load_dictionary('/home/user/app/src/emoji_embeddings_dict.pkl') |
|
|
|
|
|
emojis = list(embedding_dict.keys()) |
|
|
|
|
|
|
|
vector_DB_clinet = load_qdrant_client(embedding_dict) |
|
|
|
st.title("Emojeez π ") |
|
|
|
|
|
languages_link = "https://github.com/badrex/emojeez/blob/main/LANGUAGES" |
|
|
|
app_description = f""" |
|
AI-powered, multilingual semantic search for emojis in 50+ languages π |
|
""" |
|
app_example = """ |
|
β¨οΈ For example, type β hit the target β or β illuminating β below |
|
""" |
|
st.text(app_description) |
|
|
|
|
|
|
|
|
|
|
|
with st.container(border=True): |
|
random_query = random.sample(example_queries, 1)[0] |
|
|
|
if 'input_text' not in st.session_state: |
|
st.session_state.input_text = random_query |
|
|
|
instr = f'Enter your text query here ...' |
|
|
|
st.caption(app_example) |
|
|
|
|
|
col1, col2 = st.columns([3.5, 1]) |
|
|
|
with col1: |
|
query = st.text_input( |
|
instr, |
|
value="", |
|
placeholder=instr, |
|
label_visibility='collapsed', |
|
|
|
help="exploration discovery", |
|
on_change=lambda: st.session_state.update({ |
|
'enter_clicked': True |
|
} |
|
) |
|
|
|
|
|
) |
|
|
|
|
|
with col2: |
|
trigger_search = st.button( |
|
label="β¨ Find emojis!", |
|
use_container_width=True |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if trigger_search or (st.session_state.get('enter_clicked') and query): |
|
if query: |
|
|
|
render_results( |
|
sentence_encoder, |
|
vector_DB_clinet, |
|
query |
|
) |
|
|
|
|
|
else: |
|
st.error("Please enter a query of a few keywords to search!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
footer = """ |
|
<style> |
|
.footer { |
|
position: relative; |
|
left: 0; |
|
bottom: 0; |
|
width: 100%; |
|
background-color: transparent; |
|
color: gray; |
|
text-align: center; |
|
padding: 10px; |
|
font-size: 16px; |
|
} |
|
.streamlit-container { |
|
margin-bottom: 10px; /* Adjust this value based on your footer height */ |
|
} |
|
</style> |
|
<div class="footer"> |
|
Developed with π by <a href="https://badrex.github.io/" target="_blank">Badr Alabsi</a> <br /> |
|
π   <a href="https://medium.com/p/f85a36a86f21" target="_blank">Blog Post</a>   |   |
|
π   <a href="https://github.com/badrex/emojeez/blob/main/LANGUAGES" target="_blank">Supported Languages</a>   |   |
|
π   <a href="https://github.com/badrex/emojeez" target="_blank">Code on GitHub</a> |
|
</div> |
|
""" |
|
|
|
|
|
footer_column = st.columns(1) |
|
with footer_column[0]: |
|
st.markdown(footer, unsafe_allow_html=True) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
|