File size: 1,493 Bytes
d9ac5ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import emoji_data_python
import pickle
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer('all-mpnet-base-v2')
try:
    with open('embeddings_list.pkl', 'rb') as f:
        embeddings_list = pickle.load(f)
except:
    embeddings_list = []
emojis_to_compute = [e for e in emoji_data_python.emoji_data if e.unified not in [e[0] for e in embeddings_list]]
if emojis_to_compute:
    for e in tqdm(emojis_to_compute, desc='Computing embeddings'):
        strings = [n.replace('_', ' ').strip() for n in e.short_names] + [e.name.lower()]
        for s in strings:
            embedding = model.encode(s)
            embeddings_list.append((e.unified, embedding))
    with open('embeddings_list.pkl', 'wb') as f:
        pickle.dump(embeddings_list, f)
def closest_emoji(text):
    text_embedding = model.encode(text)
    closest_emoji = None
    closest_distance = np.inf
    for emoji, emoji_embedding in embeddings_list:
        distance = np.linalg.norm(text_embedding - emoji_embedding)
        if distance < closest_distance:
            closest_distance = distance
            closest_emoji = emoji
    return emoji_data_python.unified_to_char(closest_emoji)
import gradio as gr
emoji_input = gr.inputs.Textbox(label='text in')
emoji_output = gr.outputs.Textbox(label='emoji out')
iface = gr.Interface(fn=closest_emoji, inputs=emoji_input, outputs=emoji_output, 
                     title='text to emoji')
iface.launch()