Spaces:
Sleeping
Sleeping
File size: 3,165 Bytes
e8a2d7c c644909 3db21cd 78f153b 3db21cd 73f55cc 4352744 6eb3e9b 73f55cc 4352744 16d4b86 e8a2d7c 4352744 6eb3e9b 4352744 6eb3e9b 0a0ba7a 8a0b589 0a0ba7a 3db21cd 0a0ba7a 6eb3e9b e8a2d7c 6c5550d e8a2d7c 6c5550d bf8b184 e8a2d7c e374794 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Modell und Tokenizer laden
# gpt2 outputs text!
#tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
#model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
# distilgpt2 is only 80MB -> NOK, no emojis
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
model = AutoModelForCausalLM.from_pretrained("distilgpt2")
# tiny-gpt2 is only 20MB -> NOK, no emojis
#tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
#model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")
# TinyLlama
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
# OLD conversion method
def text_to_emoji_OLD(input_text):
# Eingabetext bereinigen (optional)
cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
# Prompt vorbereiten
prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n'
# Tokenisierung und Generation
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=30,
do_sample=True,
temperature=0.8,
top_k=50
)
# Decodieren
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Nur den Teil nach dem Prompt extrahieren
emoji_part = generated_text[len(prompt):].strip()
return emoji_part
# conversion method
def text_to_emoji(input_text):
# Eingabetext bereinigen (optional)
cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
# Pure pattern-based prompt
prompt = (
"Hi there β ππ\n"
"Good night β ππ΄\n"
"I love pizza β β€οΈπ\n"
"It's raining β π§οΈβ\n"
"Happy birthday β πππ₯³\n"
"I am so tired β π΄π€\n"
"Letβs go to the beach β ποΈππ\n"
"Iβm feeling lucky β ππ€\n"
"Weβre getting married β ππ°π€΅\n"
"Merry Christmas β πππ
\n"
"Letβs party β ππΊπ\n"
f"{cleaned_text} β"
)
# Tokenisierung und Generation
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=10,
do_sample=True,
temperature=0.9,
top_k=50,
pad_token_id=tokenizer.eos_token_id # Prevents warning
)
# Decodieren
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Nur den generierten Teil nach dem letzten "β"
emoji_part = generated_text.split("β")[-1].strip().split("\n")[0]
return emoji_part
# Gradio UI
iface = gr.Interface(
fn=text_to_emoji,
inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
outputs="text",
title="AI-Powered Emoji Translator",
description="Enter a sentence, and the AI will transform it into an emoji-version π₯³"
)
iface.launch() |