elapt1c commited on
Commit
adccde7
·
verified ·
1 Parent(s): b8b1c07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -147
app.py CHANGED
@@ -1,155 +1,96 @@
1
- import os
2
- import torch
3
  import gradio as gr
4
- from transformers import AutoTokenizer, AutoConfig
5
- import torch.nn as nn
6
-
7
- # ----- Model Definition -----
8
- class CustomDialoGPT(nn.Module):
9
- def __init__(self, vocab_size, n_embd=768, n_head=12, n_layer=12):
10
- super().__init__()
11
-
12
- config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium",
13
- vocab_size=vocab_size,
14
- n_embd=n_embd,
15
- n_head=n_head,
16
- n_layer=n_layer,
17
- bos_token_id=50256,
18
- eos_token_id=50256,
19
- pad_token_id = 50256
20
- )
21
- self.transformer = AutoModelForCausalLM.from_config(config)
22
- self.lm_head = nn.Linear(n_embd, vocab_size, bias=False)
23
-
24
- def forward(self, input_ids):
25
- transformer_outputs = self.transformer(input_ids=input_ids, output_hidden_states=True)
26
- hidden_states = transformer_outputs.hidden_states[-1] #get last hidden state
27
- logits = self.lm_head(hidden_states)
28
- return logits
29
-
30
- def get_num_params(self):
31
- return sum(p.numel() for p in self.parameters())
32
-
33
- def build_model(vocab_size, target_params=128_000_000):
34
- """Build a model with around the target parameter count, ensuring n_embd is divisible by n_head."""
35
-
36
- n_embd_options = [512, 768, 1024]
37
- n_head_options = [8, 12, 16]
38
- n_layer_options = [6, 8, 12, 16]
39
-
40
- best_params_diff = float('inf')
41
- best_n_embd = 0
42
- best_n_head = 0
43
- best_n_layer = 0
44
-
45
- for n_embd in n_embd_options:
46
- for n_head in n_head_options:
47
- if n_embd % n_head != 0:
48
- continue
49
- for n_layer in n_layer_options:
50
- model = CustomDialoGPT(vocab_size, n_embd, n_head, n_layer)
51
- params = model.get_num_params()
52
- params_diff = abs(params - target_params)
53
-
54
- if params_diff < best_params_diff:
55
- best_params_diff = params_diff
56
- best_n_embd = n_embd
57
- best_n_head = n_head
58
- best_n_layer = n_layer
59
-
60
- del model
61
- print("Model parameters:", best_n_embd, best_n_head, best_n_layer)
62
-
63
- return CustomDialoGPT(vocab_size, best_n_embd, best_n_head, best_n_layer)
64
-
65
- def chat_with_model(user_input, model, tokenizer, device="cpu", max_length=100, temperature=0.8):
66
- """
67
- Generates a response from the model given the user input.
68
- """
69
- input_ids = tokenizer.encode(user_input, return_tensors='pt').to(device)
70
 
71
  with torch.no_grad():
72
- output = model.transformer.generate(
73
- inputs=input_ids,
74
- max_length=max_length,
75
- temperature=temperature,
76
  pad_token_id=tokenizer.eos_token_id,
 
 
77
  )
78
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
79
- return generated_text
80
-
81
- def load_model_and_tokenizer(model_repo, device):
82
- """Loads the model and tokenizer from the Hugging Face model repo."""
83
- try:
84
- # Check if running in hugging face
85
- if 'HF_MODEL_ID' in os.environ:
86
- # Load tokenizer
87
- tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
88
- vocab_size = len(tokenizer)
89
-
90
- model = build_model(vocab_size)
91
-
92
- #Load model and optimizer
93
- checkpoint_files = [f for f in os.listdir(".") if f.endswith('.pth')]
94
- if not checkpoint_files:
95
- print("No checkpoint found. Please train the model first.")
96
- return
97
-
98
- checkpoint_path = checkpoint_files[0]
99
-
100
- checkpoint = torch.load(checkpoint_path, map_location=device)
101
- model.load_state_dict(checkpoint['model_state_dict'])
102
-
103
- model.to(device)
104
- model.eval()
105
- print(f"Model loaded on device: {device}")
106
- else:
107
- # Load tokenizer
108
- tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
109
- vocab_size = len(tokenizer)
110
-
111
- model = build_model(vocab_size)
112
-
113
- #Load model and optimizer
114
- checkpoint_path = input("Enter the path to your .pth checkpoint file: ")
115
-
116
- checkpoint = torch.load(checkpoint_path, map_location=device)
117
- model.load_state_dict(checkpoint['model_state_dict'])
118
-
119
- model.to(device)
120
- model.eval()
121
- print(f"Model loaded on device: {device}")
122
- return model, tokenizer
123
- except Exception as e:
124
- print(f"Error loading model or tokenizer: {e}")
125
- return None, None
126
-
127
-
128
- def gradio_chat(model, tokenizer, device="cpu", max_length = 100, temperature = 0.8):
129
- """Defines the gradio chatbot interaction."""
130
- def respond(message, chat_history):
131
- bot_message = chat_with_model(message, model, tokenizer, device=device, max_length=max_length, temperature = temperature)
132
- chat_history.append((message, bot_message))
133
- return "", chat_history
134
-
135
- with gr.Blocks() as demo:
136
- chatbot = gr.Chatbot()
137
- msg = gr.Textbox()
138
- clear = gr.Button("Clear")
139
-
140
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
141
- clear.click(lambda: None, None, chatbot, queue=False)
142
-
143
- return demo
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  if __name__ == "__main__":
147
- # Load model and tokenizer (downloading from hugging face model repo).
148
- device = "cuda" if torch.cuda.is_available() else "cpu"
149
- print(f"Using device: {device}")
150
-
151
- model, tokenizer = load_model_and_tokenizer("elapt1c/ElapticAI-1a", device = device)
152
- if model and tokenizer:
153
- #launch the gradio interface.
154
- demo = gradio_chat(model, tokenizer, device = device)
155
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
+ from huggingface_hub import hf_hub_download
5
+ import os
6
+
7
+ # Model and tokenizer details
8
+ model_repo = "elapt1c/ElapticAI-1a"
9
+ model_filename = "pytorch_model.bin" # Assuming the model is saved as pytorch_model.bin, adjust if needed. Check the HF repo.
10
+ tokenizer_name = "microsoft/DialoGPT-medium"
11
+
12
+ # Device configuration
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ # Load tokenizer
16
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
17
+
18
+ # Load model configuration
19
+ config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium")
20
+ # Initialize model from config (important to use the same architecture)
21
+ model = AutoModelForCausalLM.from_config(config)
22
+
23
+ # Download and load model weights
24
+ try:
25
+ pth_filepath = hf_hub_download(repo_id=model_repo, filename=model_filename)
26
+ checkpoint = torch.load(pth_filepath, map_location=device)
27
+
28
+ # Handle different checkpoint saving formats if needed.
29
+ # If your checkpoint is just the state_dict, load it directly.
30
+ if 'model_state_dict' in checkpoint:
31
+ model.load_state_dict(checkpoint['model_state_dict'])
32
+ elif 'state_dict' in checkpoint:
33
+ model.load_state_dict(checkpoint['state_dict'])
34
+ else:
35
+ # Assume checkpoint is just the raw state_dict
36
+ model.load_state_dict(checkpoint)
37
+
38
+ print(f"Successfully loaded model weights from {model_repo}/{model_filename}")
39
+ except Exception as e:
40
+ print(f"Error loading model: {e}")
41
+ print("Please ensure the model repository and filename are correct.")
42
+ raise e # It's better to raise the error in a Space, so it's visible.
43
+
44
+ model.to(device)
45
+ model.eval() # Set model to evaluation mode
46
+
47
+ def chat_with_model(user_input, history=[]):
48
+ """Chatbot function to interact with the loaded model."""
49
+ history_transformer_format = history_to_transformer_format(history)
50
+ input_text = tokenizer.eos_token.join(history_transformer_format + [user_input])
51
+
52
+ input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  with torch.no_grad():
55
+ output = model.generate(
56
+ input_ids,
57
+ max_length=1000, # Adjust as needed
 
58
  pad_token_id=tokenizer.eos_token_id,
59
+ temperature=0.7,
60
+ top_p=0.9
61
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
64
+
65
+ # Extract only the bot's last response, assuming it's after the last user input.
66
+ # This is a simple heuristic and might need adjustments based on training data format.
67
+ split_response = response.split(tokenizer.eos_token)
68
+ bot_response = split_response[-1].strip()
69
+
70
+ history.append((user_input, bot_response))
71
+ return bot_response, history
72
+
73
+ def history_to_transformer_format(history):
74
+ """Convert gradio history to a list of strings for transformer input."""
75
+ history_formatted = []
76
+ for user_msg, bot_msg in history:
77
+ history_formatted.append(user_msg)
78
+ history_formatted.append(bot_msg)
79
+ return history_formatted
80
+
81
+
82
+ iface = gr.ChatInterface(
83
+ fn=chat_with_model,
84
+ inputs=gr.Chatbox(placeholder="Type your message here..."),
85
+ outputs=gr.Chatbot(),
86
+ title="ElapticAI-1a Chatbot",
87
+ description="Simple chatbot interface for ElapticAI-1a model. Talk to the model and see its responses!",
88
+ examples=[
89
+ ["Hello"],
90
+ ["How are you?"],
91
+ ["Tell me a joke"]
92
+ ]
93
+ )
94
 
95
  if __name__ == "__main__":
96
+ iface.launch()