Trickshotblaster commited on
Commit
766c9b2
·
1 Parent(s): 91f17ff

Minor update

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -9,12 +9,21 @@ demo = gr.Interface(fn=gpt.get_response, inputs=["textbox",
9
  gr.Slider(0.1, 2.0, value=1.0),
10
  gr.Dropdown(
11
  ["mike-chat", "mike-code", "mike-code-600m"], value="mike-chat"),
12
- ], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""Mike is the greatest AI ever created. It was trained for about 8 hrs on my pc using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other lms of its size (about 160M params). Model details:
 
 
 
13
  block_size: 512
14
  n_layers: 12
15
  n_heads: 12
16
  d_model: 768
17
- (Same as gpt-2 but without weight tying)""")
 
 
 
 
 
 
18
 
19
 
20
  if __name__ == "__main__":
 
9
  gr.Slider(0.1, 2.0, value=1.0),
10
  gr.Dropdown(
11
  ["mike-chat", "mike-code", "mike-code-600m"], value="mike-chat"),
12
+ ], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""
13
+ Notice: if you have a GPU, I would highly recommend cloning the space and running it locally. The CPU provided by spaces isn't very fast.
14
+
15
+ Mike is a small GPT-style language model. It was trained for about 8 hrs on my PC using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other LMs of its size (about 160M params). Model details:
16
  block_size: 512
17
  n_layers: 12
18
  n_heads: 12
19
  d_model: 768
20
+ (Same as gpt-2 but without weight tying)
21
+
22
+ Architecture for Mike-Code-600m:
23
+ block_size: 256
24
+ n_layers: 16
25
+ n_heads: 12
26
+ d_model: 1536""")
27
 
28
 
29
  if __name__ == "__main__":