Spaces:

neuralworm
/

meta-llama-3.1-8B

Runtime error

App Files Files Community

neuralworm commited on Jul 27, 2024

Commit

cefcee1

verified ·

1 Parent(s): 3fc4a36

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -24

app.py CHANGED Viewed

@@ -3,14 +3,11 @@ from huggingface_hub import InferenceClient
 import os
 # Ensure the required library is installed
-os.system("pip install minijinja")
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("meta-llama/Meta-Llama-3.1-8B")
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -46,25 +43,63 @@ def respond(
     except Exception as e:
         yield f"Error: {str(e)}"
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import os
 # Ensure the required library is installed
+os.system("pip install minijinja gradio huggingface_hub")
+# Initialize the client with the desired model
 client = InferenceClient("meta-llama/Meta-Llama-3.1-8B")
 def respond(
     message,
     history: list[tuple[str, str]],
     except Exception as e:
         yield f"Error: {str(e)}"
+def autocomplete(prompt, max_tokens, temperature, top_p):
+    messages = [prompt]
+    response = ""
+    try:
+        for message in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = message.choices[0].delta.content
+            response += token
+            yield response
+    except Exception as e:
+        yield f"Error: {str(e)}"
+# Create the Gradio interface
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("# Chat with Meta-Llama")
+    with gr.Tab("Chat Interface"):
+        chatbot = gr.ChatInterface(
+            respond,
+            additional_inputs=[
+                gr.Textbox(value="", label="System message"),
+                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+                gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.95,
+                    step=0.05,
+                    label="Top-p (nucleus sampling)",
+                ),
+            ],
+        )
+    with gr.Tab("Notebook Interface"):
+        gr.Markdown("## Notebook Interface with Autocomplete")
+        prompt = gr.Textbox(label="Enter your text")
+        output = gr.Textbox(label="Autocompleted Text", interactive=False)
+        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+        autocomplete_button = gr.Button("Autocomplete")
+        autocomplete_button.click(
+            autocomplete,
+            inputs=[prompt, max_tokens, temperature, top_p],
+            outputs=output
+        )
 if __name__ == "__main__":
+    demo.launch()