ai01firebird commited on
Commit
0abc7a9
Β·
verified Β·
1 Parent(s): 8634e34

switch to emojinator-gpt2-v3

Browse files
Files changed (1) hide show
  1. app.py +34 -7
app.py CHANGED
@@ -5,7 +5,11 @@ import torch
5
 
6
  # Modell und Tokenizer laden
7
  HF_USER = "ai01firebird"
8
- MODEL_NAME = "emojinator-gpt2"
 
 
 
 
9
 
10
  # gpt2 outputs text!
11
  #tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
@@ -15,10 +19,6 @@ MODEL_NAME = "emojinator-gpt2"
15
  #tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
16
  #model = AutoModelForCausalLM.from_pretrained("distilgpt2")
17
 
18
- # fine-tuned
19
- model = AutoModelForCausalLM.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
20
- tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
21
-
22
  # tiny-gpt2 is only 20MB -> NOK, no emojis
23
  #tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
24
  #model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")
@@ -28,7 +28,7 @@ tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
28
  #model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
29
 
30
  # OLD conversion method
31
- def text_to_emoji_OLD(input_text):
32
  # Eingabetext bereinigen (optional)
33
  cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
34
 
@@ -54,7 +54,7 @@ def text_to_emoji_OLD(input_text):
54
  return emoji_part
55
 
56
  # conversion method
57
- def text_to_emoji(input_text):
58
  # Eingabetext bereinigen (optional)
59
  cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
60
 
@@ -73,6 +73,7 @@ def text_to_emoji(input_text):
73
  "Let’s party β†’ πŸŽ‰πŸ•ΊπŸ’ƒ\n"
74
  f"{cleaned_text} β†’"
75
  )
 
76
 
77
  # Tokenisierung und Generation
78
  inputs = tokenizer(prompt, return_tensors="pt")
@@ -93,6 +94,32 @@ def text_to_emoji(input_text):
93
 
94
  return emoji_part
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # Gradio UI
97
  iface = gr.Interface(
98
  fn=text_to_emoji,
 
5
 
6
  # Modell und Tokenizer laden
7
  HF_USER = "ai01firebird"
8
+ MODEL_NAME = "emojinator-gpt2-v3"
9
+
10
+ # fine-tuned
11
+ model = AutoModelForCausalLM.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
12
+ tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
13
 
14
  # gpt2 outputs text!
15
  #tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
 
19
  #tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
20
  #model = AutoModelForCausalLM.from_pretrained("distilgpt2")
21
 
 
 
 
 
22
  # tiny-gpt2 is only 20MB -> NOK, no emojis
23
  #tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
24
  #model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")
 
28
  #model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
29
 
30
  # OLD conversion method
31
+ def text_to_emoji_OLD_OLD(input_text):
32
  # Eingabetext bereinigen (optional)
33
  cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
34
 
 
54
  return emoji_part
55
 
56
  # conversion method
57
+ def text_to_emoji_OLD(input_text):
58
  # Eingabetext bereinigen (optional)
59
  cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
60
 
 
73
  "Let’s party β†’ πŸŽ‰πŸ•ΊπŸ’ƒ\n"
74
  f"{cleaned_text} β†’"
75
  )
76
+ prompt = f"Text: {input_text}\nEmoji:"
77
 
78
  # Tokenisierung und Generation
79
  inputs = tokenizer(prompt, return_tensors="pt")
 
94
 
95
  return emoji_part
96
 
97
+ # conversion method
98
+ def text_to_emoji(input_text):
99
+ # Eingabetext bereinigen (optional)
100
+ cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
101
+
102
+ prompt = f"Text: {cleaned_text}\nEmoji:"
103
+
104
+ # Tokenisierung und Generation
105
+ inputs = tokenizer(prompt, return_tensors="pt")
106
+ outputs = model.generate(
107
+ **inputs,
108
+ max_new_tokens=10,
109
+ do_sample=True,
110
+ temperature=0.9,
111
+ top_k=50,
112
+ pad_token_id=tokenizer.eos_token_id # Prevents warning
113
+ )
114
+
115
+ # Decodieren
116
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
117
+
118
+ # Nur den generierten Teil nach dem letzten "β†’"
119
+ emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0]
120
+
121
+ return emoji_part
122
+
123
  # Gradio UI
124
  iface = gr.Interface(
125
  fn=text_to_emoji,