Spaces:

Smiley0707
/

Llama-3.1-8B

Sleeping

App Files Files Community

Smiley0707 commited on Jul 29

Commit

1674648

•

1 Parent(s): 460745e

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -4

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from threading import Thread
 MODEL_LIST = ["meta-llama/Meta-Llama-3.1-8B-Instruct"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 TITLE = "<h1><center>Meta-Llama3.1-8B</center></h1>"
@@ -38,9 +39,9 @@ quantization_config = BitsAndBytesConfig(
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type= "nf4")
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Meta-Llama-3.1-8B-Instruct",
     torch_dtype=torch.bfloat16,
     device_map="auto",
     quantization_config=quantization_config)
@@ -100,13 +101,68 @@ chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.ChatInterface(
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
         cache_examples=False,
     )
-if __name__=='__main__':
-    demo.launch()

 MODEL_LIST = ["meta-llama/Meta-Llama-3.1-8B-Instruct"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL = os.environ.get("MODEL_ID")
 TITLE = "<h1><center>Meta-Llama3.1-8B</center></h1>"
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type= "nf4")
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     quantization_config=quantization_config)
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
+        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
+        additional_inputs=[
+            gr.Textbox(
+                value="You are a helpful assistant",
+                label="System Prompt",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=0,
+                maximum=1,
+                step=0.1,
+                value=0.8,
+                label="Temperature",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=128,
+                maximum=8192,
+                step=1,
+                value=1024,
+                label="Max new tokens",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.1,
+                value=1.0,
+                label="top_p",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=1,
+                maximum=20,
+                step=1,
+                value=20,
+                label="top_k",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=0.0,
+                maximum=2.0,
+                step=0.1,
+                value=1.2,
+                label="Repetition penalty",
+                render=False,
+            ),
+        ],
+        examples=[
+            ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
+            ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
+            ["Tell me a random fun fact about the Roman Empire."],
+            ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
+        ],
         cache_examples=False,
     )
+if __name__ == "__main__":
+    demo.launch()