File size: 3,165 Bytes
e8a2d7c
c644909
3db21cd
 
78f153b
3db21cd
73f55cc
 
 
 
 
4352744
6eb3e9b
 
73f55cc
4352744
16d4b86
 
e8a2d7c
4352744
6eb3e9b
 
4352744
6eb3e9b
 
0a0ba7a
 
8a0b589
0a0ba7a
 
 
 
3db21cd
0a0ba7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eb3e9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8a2d7c
 
 
 
6c5550d
e8a2d7c
6c5550d
bf8b184
e8a2d7c
 
e374794
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Modell und Tokenizer laden

# gpt2 outputs text!
#tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
#model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")

# distilgpt2 is only 80MB -> NOK, no emojis
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2")

# tiny-gpt2 is only 20MB -> NOK, no emojis
#tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
#model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")

# TinyLlama
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# OLD conversion method
def text_to_emoji_OLD(input_text):
    # Eingabetext bereinigen (optional)
    cleaned_text = re.sub(r"[.,!?;:]", "", input_text)

    # Prompt vorbereiten
    prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n'

    # Tokenisierung und Generation
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.8,
        top_k=50
    )

    # Decodieren
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Nur den Teil nach dem Prompt extrahieren
    emoji_part = generated_text[len(prompt):].strip()

    return emoji_part

# conversion method
def text_to_emoji(input_text):
    # Eingabetext bereinigen (optional)
    cleaned_text = re.sub(r"[.,!?;:]", "", input_text)

    # Pure pattern-based prompt
    prompt = (
        "Hi there β†’ πŸ‘‹πŸ™‚\n"
        "Good night β†’ πŸŒ™πŸ˜΄\n"
        "I love pizza β†’ β€οΈπŸ•\n"
        "It's raining β†’ πŸŒ§οΈβ˜”\n"
        "Happy birthday β†’ πŸŽ‰πŸŽ‚πŸ₯³\n"
        "I am so tired β†’ πŸ˜΄πŸ’€\n"
        "Let’s go to the beach β†’ πŸ–οΈπŸŒŠπŸ˜Ž\n"
        "I’m feeling lucky β†’ πŸ€πŸ€ž\n"
        "We’re getting married β†’ πŸ’πŸ‘°πŸ€΅\n"
        "Merry Christmas β†’ πŸŽ„πŸŽπŸŽ…\n"
        "Let’s party β†’ πŸŽ‰πŸ•ΊπŸ’ƒ\n"
        f"{cleaned_text} β†’"
    )
    
    # Tokenisierung und Generation
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        do_sample=True,
        temperature=0.9,
        top_k=50,
        pad_token_id=tokenizer.eos_token_id  # Prevents warning
    )

    # Decodieren
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Nur den generierten Teil nach dem letzten "β†’"
    emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0]

    return emoji_part

# Gradio UI
iface = gr.Interface(
    fn=text_to_emoji,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
    outputs="text",
    title="AI-Powered Emoji Translator",
    description="Enter a sentence, and the AI will transform it into an emoji-version πŸ₯³"
)

iface.launch()