ai01firebird commited on
Commit
0a0ba7a
·
verified ·
1 Parent(s): 95f880a

extract emojis

Browse files
Files changed (1) hide show
  1. app.py +23 -13
app.py CHANGED
@@ -18,21 +18,31 @@ tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
18
  model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")
19
 
20
  # conversion method
21
- def text_to_emoji(text):
22
- # remove special characters
23
- text_cleaned = re.sub(r"[.,!?;:]", "", text)
24
- prompt = f"Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n\"{text_cleaned}\""
25
 
26
- # Tokenisieren
 
 
 
27
  inputs = tokenizer(prompt, return_tensors="pt")
28
-
29
- # Antwort generieren
30
- outputs = model.generate(**inputs, max_new_tokens=25, do_sample=True, temperature=0.7)
31
-
32
- # Antwort decodieren
33
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
-
35
- return result
 
 
 
 
 
 
 
 
36
 
37
  # Gradio UI
38
  iface = gr.Interface(
 
18
  model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")
19
 
20
  # conversion method
21
+ def text_to_emoji(input_text):
22
+ # Eingabetext bereinigen (optional)
23
+ cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
 
24
 
25
+ # Prompt vorbereiten
26
+ prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n'
27
+
28
+ # Tokenisierung und Generation
29
  inputs = tokenizer(prompt, return_tensors="pt")
30
+ outputs = model.generate(
31
+ **inputs,
32
+ max_new_tokens=30,
33
+ do_sample=True,
34
+ temperature=0.8,
35
+ top_k=50
36
+ )
37
+
38
+ # Decodieren
39
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+
41
+ # Nur den Teil nach dem Prompt extrahieren
42
+ emoji_part = generated_text[len(prompt):].strip()
43
+
44
+ return emoji_part
45
+
46
 
47
  # Gradio UI
48
  iface = gr.Interface(