WICKED4950 commited on
Commit
fe0a282
·
verified ·
1 Parent(s): afe2bef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -25
app.py CHANGED
@@ -1,83 +1,87 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  import tensorflow as tf
4
- from huggingface_hub import login, create_repo, upload_file
5
- import os
6
  from transformers import AutoTokenizer, TFAutoModelForCausalLM
 
 
 
7
  policy = tf.keras.mixed_precision.Policy('mixed_bfloat16')
8
  tf.keras.mixed_precision.set_global_policy(policy)
9
  strategy = tf.distribute.MultiWorkerMirroredStrategy()
10
 
 
11
  login(os.environ.get("hf_token"))
 
 
12
  name = "WICKED4950/GPT2mini-InstEsther0.5eV3.2"
13
  tokenizer = AutoTokenizer.from_pretrained(name)
14
  tokenizer.pad_token = tokenizer.eos_token
 
15
  with strategy.scope():
16
  model = TFAutoModelForCausalLM.from_pretrained(name)
17
 
 
18
  def raw_pred(input, model, tokenizer, max_length=1024, temperature=0.2):
19
  input_ids = tokenizer.encode(input, return_tensors='tf')
20
 
21
  # Initialize variables
22
  generated_ids = input_ids
23
- stop_token_id = tokenizer.encode("<|SOH|>", add_special_tokens=False)[0] # ID for <|SOH|>
24
  all_generated_tokens = [] # To store generated token IDs
25
  tokens_yielded = [] # To store tokens as they are yielded
26
 
27
  with strategy.scope():
28
  for _ in range(max_length // 1): # Generate in chunks of 3 tokens
29
- # Generate three tokens at a time
30
  outputs = model.generate(
31
  generated_ids,
32
- max_length=generated_ids.shape[1] + 1, # Increment max length by 3
33
  temperature=temperature,
34
  pad_token_id=tokenizer.eos_token_id,
35
- eos_token_id=stop_token_id, # Stop generation at <|SOH|>
36
  do_sample=True,
37
  num_return_sequences=1
38
  )
39
 
40
- # Get the newly generated tokens (last 3 tokens)
41
  new_tokens = outputs[0, -1:]
42
  generated_ids = outputs # Update the generated_ids with the new tokens
43
 
44
- # Store the generated tokens as numbers (IDs)
45
  all_generated_tokens.extend(new_tokens.numpy().tolist())
46
-
47
- # Decode and yield the tokens as they are generated (as numbers)
48
  tokens_text = tokenizer.decode(new_tokens, skip_special_tokens=False)
49
  tokens_yielded.append(tokens_text)
50
  yield tokens_text
51
 
52
- # Stop if the generated tokens include <|SOH|>
53
  if stop_token_id in new_tokens.numpy():
54
  final_text = tokenizer.decode(all_generated_tokens, skip_special_tokens=False)
55
- yield ("<|Clean|>" + final_text)
56
  break
57
 
 
58
  def respond(message, history):
59
- # Prepare input for the model
60
  give_mod = ""
61
- history = history[-3:]
62
  for chunk in history:
63
- give_mod = give_mod + "<|SOH|>" + chunk[0] + "<|SOB|>" + chunk[1]
64
- give_mod = give_mod + "<|SOH|>" + message.capitalize() + "<|SOB|>"
65
- print(give_mod)
66
  response = ""
67
  for token in raw_pred(give_mod, model, tokenizer):
68
  if "<|Clean|>" in token:
69
  response = token
70
  else:
71
  response += token
72
- yield response.replace("<|SOH|>","").replace("<|Clean|>","")
73
- print(response)
74
  # Gradio Chat Interface Setup
75
  demo = gr.ChatInterface(
76
- fn=respond, # Response handler function
77
- title="Chat with Esther", # Add a title
78
- description="A friendly chatbot ready to help and chat with you! 😊", # Brief description
79
- theme="compact", # Options: "compact", "default", "dark"
80
  )
81
 
82
  if __name__ == "__main__":
83
- demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient, login
3
  import tensorflow as tf
 
 
4
  from transformers import AutoTokenizer, TFAutoModelForCausalLM
5
+ import os
6
+
7
+ # Set up mixed precision and distribution strategy
8
  policy = tf.keras.mixed_precision.Policy('mixed_bfloat16')
9
  tf.keras.mixed_precision.set_global_policy(policy)
10
  strategy = tf.distribute.MultiWorkerMirroredStrategy()
11
 
12
+ # Log into Hugging Face
13
  login(os.environ.get("hf_token"))
14
+
15
+ # Load tokenizer and model
16
  name = "WICKED4950/GPT2mini-InstEsther0.5eV3.2"
17
  tokenizer = AutoTokenizer.from_pretrained(name)
18
  tokenizer.pad_token = tokenizer.eos_token
19
+
20
  with strategy.scope():
21
  model = TFAutoModelForCausalLM.from_pretrained(name)
22
 
23
+ # Raw Prediction Function
24
  def raw_pred(input, model, tokenizer, max_length=1024, temperature=0.2):
25
  input_ids = tokenizer.encode(input, return_tensors='tf')
26
 
27
  # Initialize variables
28
  generated_ids = input_ids
29
+ stop_token_id = tokenizer.encode("<|SOH|>", add_special_tokens=False)[0]
30
  all_generated_tokens = [] # To store generated token IDs
31
  tokens_yielded = [] # To store tokens as they are yielded
32
 
33
  with strategy.scope():
34
  for _ in range(max_length // 1): # Generate in chunks of 3 tokens
35
+ # Generate tokens
36
  outputs = model.generate(
37
  generated_ids,
38
+ max_length=generated_ids.shape[1] + 1,
39
  temperature=temperature,
40
  pad_token_id=tokenizer.eos_token_id,
41
+ eos_token_id=stop_token_id,
42
  do_sample=True,
43
  num_return_sequences=1
44
  )
45
 
46
+ # Get the newly generated tokens
47
  new_tokens = outputs[0, -1:]
48
  generated_ids = outputs # Update the generated_ids with the new tokens
49
 
50
+ # Store and yield the generated tokens
51
  all_generated_tokens.extend(new_tokens.numpy().tolist())
 
 
52
  tokens_text = tokenizer.decode(new_tokens, skip_special_tokens=False)
53
  tokens_yielded.append(tokens_text)
54
  yield tokens_text
55
 
56
+ # Stop if stop token is encountered
57
  if stop_token_id in new_tokens.numpy():
58
  final_text = tokenizer.decode(all_generated_tokens, skip_special_tokens=False)
59
+ yield "<|Clean|>" + final_text
60
  break
61
 
62
+ # Response Handler Function
63
  def respond(message, history):
 
64
  give_mod = ""
65
+ history = history[-3:] # Limit history to last 3 exchanges
66
  for chunk in history:
67
+ give_mod += f"<|SOH|>{chunk[0]}<|SOB|>{chunk[1]}"
68
+ give_mod += f"<|SOH|>{message.capitalize()}<|SOB|>"
69
+
70
  response = ""
71
  for token in raw_pred(give_mod, model, tokenizer):
72
  if "<|Clean|>" in token:
73
  response = token
74
  else:
75
  response += token
76
+ yield response.replace("<|SOH|>", "").replace("<|Clean|>", "")
77
+
78
  # Gradio Chat Interface Setup
79
  demo = gr.ChatInterface(
80
+ fn=respond,
81
+ title="Chat with Esther", # Title of the app
82
+ description="A friendly chatbot ready to help and chat with you! 😊", # Description of the app
83
+ theme="compact", # Choose the theme
84
  )
85
 
86
  if __name__ == "__main__":
87
+ demo.launch(debug=True)