terminaldz commited on
Commit
96641bf
·
verified ·
1 Parent(s): a1b22fc
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -1,10 +1,14 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient(model="meta-llama/Meta-Llama-3.1-405B-Instruct-FP8")
 
 
 
8
 
9
  def respond(
10
  message,
@@ -25,22 +29,20 @@ def respond(
25
  messages.append({"role": "user", "content": message})
26
 
27
  response = ""
28
-
29
- for message in client.chat_completion(
30
- messages,
31
  max_tokens=max_tokens,
32
  stream=True,
33
  temperature=temperature,
34
  top_p=top_p,
 
35
  ):
36
  token = message.choices[0].delta.content
37
 
38
  response += token
39
  yield response
40
 
41
- """
42
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
43
- """
44
  demo = gr.ChatInterface(
45
  respond,
46
  additional_inputs=[
@@ -57,5 +59,6 @@ demo = gr.ChatInterface(
57
  ],
58
  )
59
 
 
60
  if __name__ == "__main__":
61
- demo.launch()
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
 
5
+
6
+ TOKEN = os.getenv("HF_TOKEN")
7
+
8
+ client = OpenAI(
9
+ base_url="https://api-inference.huggingface.co/v1/",
10
+ api_key=TOKEN,
11
+ )
12
 
13
  def respond(
14
  message,
 
29
  messages.append({"role": "user", "content": message})
30
 
31
  response = ""
32
+
33
+ for message in client.chat.completions.create(
34
+ model="meta-llama/Meta-Llama-3.1-405B-Instruct-FP8",
35
  max_tokens=max_tokens,
36
  stream=True,
37
  temperature=temperature,
38
  top_p=top_p,
39
+ messages=messages,
40
  ):
41
  token = message.choices[0].delta.content
42
 
43
  response += token
44
  yield response
45
 
 
 
 
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
 
59
  ],
60
  )
61
 
62
+
63
  if __name__ == "__main__":
64
+ demo.launch()