Spaces:
Sleeping
Sleeping
Commit
·
91f17ff
1
Parent(s):
ad590a4
600M TIME
Browse files- __pycache__/gpt.cpython-310.pyc +0 -0
- app.py +1 -1
- gpt.py +9 -2
- mike-code-600m.pth +3 -0
__pycache__/gpt.cpython-310.pyc
CHANGED
Binary files a/__pycache__/gpt.cpython-310.pyc and b/__pycache__/gpt.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -8,7 +8,7 @@ demo = gr.Interface(fn=gpt.get_response, inputs=["textbox",
|
|
8 |
gr.Slider(0, 100, value=50, step=1),
|
9 |
gr.Slider(0.1, 2.0, value=1.0),
|
10 |
gr.Dropdown(
|
11 |
-
["mike-chat", "mike-code"], value="mike-chat"),
|
12 |
], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""Mike is the greatest AI ever created. It was trained for about 8 hrs on my pc using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other lms of its size (about 160M params). Model details:
|
13 |
block_size: 512
|
14 |
n_layers: 12
|
|
|
8 |
gr.Slider(0, 100, value=50, step=1),
|
9 |
gr.Slider(0.1, 2.0, value=1.0),
|
10 |
gr.Dropdown(
|
11 |
+
["mike-chat", "mike-code", "mike-code-600m"], value="mike-chat"),
|
12 |
], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""Mike is the greatest AI ever created. It was trained for about 8 hrs on my pc using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other lms of its size (about 160M params). Model details:
|
13 |
block_size: 512
|
14 |
n_layers: 12
|
gpt.py
CHANGED
@@ -134,16 +134,23 @@ my_GPT = load_compiled_model_state_dict(my_GPT, 'latest_model_finetune.pth')
|
|
134 |
#my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
|
135 |
my_GPT.eval()
|
136 |
|
137 |
-
my_GPT_code = GPT(enc.n_vocab, 256, n_layers, n_heads, d_model, dropout=0.
|
138 |
my_GPT_code = my_GPT_code.to(device)
|
139 |
#my_GPT = torch.compile(my_GPT, mode='reduce-overhead')
|
140 |
my_GPT_code = load_compiled_model_state_dict(my_GPT_code, 'mike-code-15k.pth')
|
141 |
#my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
|
142 |
my_GPT_code.eval()
|
143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
models = {
|
145 |
"mike-chat": my_GPT,
|
146 |
-
"mike-code": my_GPT_code
|
|
|
147 |
}
|
148 |
|
149 |
eot = enc._special_tokens['<|endoftext|>']
|
|
|
134 |
#my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
|
135 |
my_GPT.eval()
|
136 |
|
137 |
+
my_GPT_code = GPT(enc.n_vocab, 256, n_layers, n_heads, d_model, dropout=0.0) #enc.n_vocab
|
138 |
my_GPT_code = my_GPT_code.to(device)
|
139 |
#my_GPT = torch.compile(my_GPT, mode='reduce-overhead')
|
140 |
my_GPT_code = load_compiled_model_state_dict(my_GPT_code, 'mike-code-15k.pth')
|
141 |
#my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
|
142 |
my_GPT_code.eval()
|
143 |
|
144 |
+
my_GPT_code_600 = GPT(enc.n_vocab, 256, 16, n_heads, 768 * 2, dropout=0.0) #enc.n_vocab
|
145 |
+
my_GPT_code_600 = my_GPT_code_600.to(device)
|
146 |
+
#my_GPT = torch.compile(my_GPT, mode='reduce-overhead')
|
147 |
+
my_GPT_code_600 = load_compiled_model_state_dict(my_GPT_code_600, 'mike-code-600m.pth')
|
148 |
+
#my_GPT.load_state_dict(torch.load('latest_model_finetune.pth', map_location=torch.device('cpu')))
|
149 |
+
my_GPT_code_600.eval()
|
150 |
models = {
|
151 |
"mike-chat": my_GPT,
|
152 |
+
"mike-code": my_GPT_code,
|
153 |
+
"mike-code-600m": my_GPT_code_600
|
154 |
}
|
155 |
|
156 |
eot = enc._special_tokens['<|endoftext|>']
|
mike-code-600m.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:721dc760156cba2bfcfe6556e6867b7c0f8c03fc3d307cbccd78d2934544e440
|
3 |
+
size 2432463763
|