Trickshotblaster commited on
Commit
91f17ff
·
1 Parent(s): ad590a4
Files changed (4) hide show
  1. __pycache__/gpt.cpython-310.pyc +0 -0
  2. app.py +1 -1
  3. gpt.py +9 -2
  4. mike-code-600m.pth +3 -0
__pycache__/gpt.cpython-310.pyc CHANGED
Binary files a/__pycache__/gpt.cpython-310.pyc and b/__pycache__/gpt.cpython-310.pyc differ
 
app.py CHANGED
@@ -8,7 +8,7 @@ demo = gr.Interface(fn=gpt.get_response, inputs=["textbox",
8
  gr.Slider(0, 100, value=50, step=1),
9
  gr.Slider(0.1, 2.0, value=1.0),
10
  gr.Dropdown(
11
- ["mike-chat", "mike-code"], value="mike-chat"),
12
  ], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""Mike is the greatest AI ever created. It was trained for about 8 hrs on my pc using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other lms of its size (about 160M params). Model details:
13
  block_size: 512
14
  n_layers: 12
 
8
  gr.Slider(0, 100, value=50, step=1),
9
  gr.Slider(0.1, 2.0, value=1.0),
10
  gr.Dropdown(
11
+ ["mike-chat", "mike-code", "mike-code-600m"], value="mike-chat"),
12
  ], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""Mike is the greatest AI ever created. It was trained for about 8 hrs on my pc using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other lms of its size (about 160M params). Model details:
13
  block_size: 512
14
  n_layers: 12
gpt.py CHANGED
@@ -134,16 +134,23 @@ my_GPT = load_compiled_model_state_dict(my_GPT, 'latest_model_finetune.pth')
134
  #my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
135
  my_GPT.eval()
136
 
137
- my_GPT_code = GPT(enc.n_vocab, 256, n_layers, n_heads, d_model, dropout=0.1) #enc.n_vocab
138
  my_GPT_code = my_GPT_code.to(device)
139
  #my_GPT = torch.compile(my_GPT, mode='reduce-overhead')
140
  my_GPT_code = load_compiled_model_state_dict(my_GPT_code, 'mike-code-15k.pth')
141
  #my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
142
  my_GPT_code.eval()
143
 
 
 
 
 
 
 
144
  models = {
145
  "mike-chat": my_GPT,
146
- "mike-code": my_GPT_code
 
147
  }
148
 
149
  eot = enc._special_tokens['<|endoftext|>']
 
134
  #my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
135
  my_GPT.eval()
136
 
137
+ my_GPT_code = GPT(enc.n_vocab, 256, n_layers, n_heads, d_model, dropout=0.0) #enc.n_vocab
138
  my_GPT_code = my_GPT_code.to(device)
139
  #my_GPT = torch.compile(my_GPT, mode='reduce-overhead')
140
  my_GPT_code = load_compiled_model_state_dict(my_GPT_code, 'mike-code-15k.pth')
141
  #my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
142
  my_GPT_code.eval()
143
 
144
+ my_GPT_code_600 = GPT(enc.n_vocab, 256, 16, n_heads, 768 * 2, dropout=0.0) #enc.n_vocab
145
+ my_GPT_code_600 = my_GPT_code_600.to(device)
146
+ #my_GPT = torch.compile(my_GPT, mode='reduce-overhead')
147
+ my_GPT_code_600 = load_compiled_model_state_dict(my_GPT_code_600, 'mike-code-600m.pth')
148
+ #my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
149
+ my_GPT_code_600.eval()
150
  models = {
151
  "mike-chat": my_GPT,
152
+ "mike-code": my_GPT_code,
153
+ "mike-code-600m": my_GPT_code_600
154
  }
155
 
156
  eot = enc._special_tokens['<|endoftext|>']
mike-code-600m.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721dc760156cba2bfcfe6556e6867b7c0f8c03fc3d307cbccd78d2934544e440
3
+ size 2432463763