neuralworm commited on
Commit
cefcee1
·
verified ·
1 Parent(s): 3fc4a36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -24
app.py CHANGED
@@ -3,14 +3,11 @@ from huggingface_hub import InferenceClient
3
  import os
4
 
5
  # Ensure the required library is installed
6
- os.system("pip install minijinja")
7
 
8
- """
9
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
10
- """
11
  client = InferenceClient("meta-llama/Meta-Llama-3.1-8B")
12
 
13
-
14
  def respond(
15
  message,
16
  history: list[tuple[str, str]],
@@ -46,25 +43,63 @@ def respond(
46
  except Exception as e:
47
  yield f"Error: {str(e)}"
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- """
51
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
52
- """
53
- demo = gr.ChatInterface(
54
- respond,
55
- additional_inputs=[
56
- gr.Textbox(value="", label="System message"),
57
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
58
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
59
- gr.Slider(
60
- minimum=0.1,
61
- maximum=1.0,
62
- value=0.95,
63
- step=0.05,
64
- label="Top-p (nucleus sampling)",
65
- ),
66
- ],
67
- )
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
3
  import os
4
 
5
  # Ensure the required library is installed
6
+ os.system("pip install minijinja gradio huggingface_hub")
7
 
8
+ # Initialize the client with the desired model
 
 
9
  client = InferenceClient("meta-llama/Meta-Llama-3.1-8B")
10
 
 
11
  def respond(
12
  message,
13
  history: list[tuple[str, str]],
 
43
  except Exception as e:
44
  yield f"Error: {str(e)}"
45
 
46
+ def autocomplete(prompt, max_tokens, temperature, top_p):
47
+ messages = [prompt]
48
+ response = ""
49
+
50
+ try:
51
+ for message in client.chat_completion(
52
+ messages,
53
+ max_tokens=max_tokens,
54
+ stream=True,
55
+ temperature=temperature,
56
+ top_p=top_p,
57
+ ):
58
+ token = message.choices[0].delta.content
59
+
60
+ response += token
61
+ yield response
62
+ except Exception as e:
63
+ yield f"Error: {str(e)}"
64
+
65
+ # Create the Gradio interface
66
+ demo = gr.Blocks()
67
+
68
+ with demo:
69
+ gr.Markdown("# Chat with Meta-Llama")
70
+
71
+ with gr.Tab("Chat Interface"):
72
+ chatbot = gr.ChatInterface(
73
+ respond,
74
+ additional_inputs=[
75
+ gr.Textbox(value="", label="System message"),
76
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
77
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
78
+ gr.Slider(
79
+ minimum=0.1,
80
+ maximum=1.0,
81
+ value=0.95,
82
+ step=0.05,
83
+ label="Top-p (nucleus sampling)",
84
+ ),
85
+ ],
86
+ )
87
+
88
+ with gr.Tab("Notebook Interface"):
89
+ gr.Markdown("## Notebook Interface with Autocomplete")
90
+ prompt = gr.Textbox(label="Enter your text")
91
+ output = gr.Textbox(label="Autocompleted Text", interactive=False)
92
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
93
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
94
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
95
+
96
+ autocomplete_button = gr.Button("Autocomplete")
97
 
98
+ autocomplete_button.click(
99
+ autocomplete,
100
+ inputs=[prompt, max_tokens, temperature, top_p],
101
+ outputs=output
102
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  if __name__ == "__main__":
105
+ demo.launch()