Spaces:

vikkaird
/

llama-3-updated

Sleeping

App Files Files Community

umair894 commited on May 28, 2024

Commit

dc48636

verified ·

1 Parent(s): 5ce6989

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -38,11 +38,11 @@ h1 {
 """
 # Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="cuda:0")  # to("auto")
 terminators = [
     tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
 #@spaces.GPU(duration=120)
@@ -75,7 +75,7 @@ def chat_llama3_8b(message: str,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
-        #temperature=temperature,
         eos_token_id=terminators,
     )
     # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
@@ -93,7 +93,7 @@ def chat_llama3_8b(message: str,
 # Gradio block
-chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
 with gr.Blocks(fill_height=True, css=css) as demo:
@@ -105,12 +105,12 @@ with gr.Blocks(fill_height=True, css=css) as demo:
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
-            # gr.Slider(minimum=0,
-            #           maximum=1,
-            #           step=0.1,
-            #           value=0.95,
-            #           label="Temperature",
-            #           render=False),
             gr.Slider(minimum=128,
                       maximum=4096,
                       step=1,

 """
 # Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("umair894/llama3")
+model = AutoModelForCausalLM.from_pretrained("umair894/llama3", device_map="cuda:0")  # to("auto")
 terminators = [
     tokenizer.eos_token_id,
+    tokenizer.convert_tokens_to_ids("<|eot_id|>") #eos_token
 ]
 #@spaces.GPU(duration=120)
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
+        temperature=temperature,
         eos_token_id=terminators,
     )
     # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
 # Gradio block
+chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='ChatInterface')
 with gr.Blocks(fill_height=True, css=css) as demo:
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
+            gr.Slider(minimum=0,
+                      maximum=1,
+                      step=0.1,
+                      value=0.95,
+                      label="Temperature",
+                      render=False),
             gr.Slider(minimum=128,
                       maximum=4096,
                       step=1,