File size: 1,949 Bytes
bf1ec05
248d4f4
bf1ec05
 
 
 
a4e819a
 
 
70d8b6d
91f17ff
766c9b2
 
 
 
a4e819a
 
 
 
766c9b2
 
 
 
 
 
 
bf1ec05
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr
import gpt

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.Interface(fn=gpt.get_response, inputs=["textbox",
                                                gr.Slider(0, 100, value=50, step=1),
                                                gr.Slider(0.1, 2.0, value=1.0),
                                                gr.Dropdown(
            ["mike-chat", "mike-code", "mike-code-600m"], value="mike-chat"),
                                                 ], outputs=gr.Markdown(line_breaks=True), title="Mike Chat", article="""
                                                Notice: if you have a GPU, I would highly recommend cloning the space and running it locally. The CPU provided by spaces isn't very fast. 
                                                
                                                 Mike is a small GPT-style language model. It was trained for about 8 hrs on my PC using fineweb-edu and open orca datasets. While it hallucinates a lot, it seems to be about on par with other LMs of its size (about 160M params). Model details:
                                                 block_size: 512
                                                 n_layers: 12
                                                 n_heads: 12
                                                 d_model: 768
                                                 (Same as gpt-2 but without weight tying)
                                                 
                                                 Architecture for Mike-Code-600m:
                                                 block_size: 256
                                                 n_layers: 16
                                                 n_heads: 12
                                                 d_model: 1536""")


if __name__ == "__main__":
    demo.launch()