GaborToth2 commited on
Commit
149acbb
·
1 Parent(s): ca6b728

cohere and HF integration

Browse files
Files changed (1) hide show
  1. app.py +38 -21
app.py CHANGED
@@ -1,11 +1,17 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  import os
 
 
 
 
 
 
4
 
5
- HF_API_KEY = os.getenv("HF_API_KEY") # Retrieve API key from environment variable
 
 
6
 
7
- models = ["HuggingFaceH4/zephyr-7b-beta", "microsoft/Phi-4-mini-instruct", "meta-llama/Llama-3.2-3B-Instruct"]
8
- client = InferenceClient(model=models[2], token=HF_API_KEY) # Pass API key to client
9
 
10
 
11
  def respond(
@@ -15,9 +21,10 @@ def respond(
15
  max_tokens,
16
  temperature,
17
  top_p,
 
18
  ):
19
  messages = [{"role": "system", "content": system_message}]
20
-
21
  for val in history:
22
  if val[0]:
23
  messages.append({"role": "user", "content": val[0]})
@@ -26,24 +33,34 @@ def respond(
26
 
27
  messages.append({"role": "user", "content": message})
28
 
29
- response = ""
30
-
31
- for message in client.chat_completion(
32
- messages,
33
- max_tokens=max_tokens,
34
- stream=True,
35
- temperature=temperature,
36
- top_p=top_p,
37
- ):
38
- token = message.choices[0].delta.content
 
 
39
 
40
- response += token
41
- yield response
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
- """
45
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
46
- """
47
  demo = gr.ChatInterface(
48
  respond,
49
  additional_inputs=[
@@ -57,9 +74,9 @@ demo = gr.ChatInterface(
57
  step=0.05,
58
  label="Top-p (nucleus sampling)",
59
  ),
 
60
  ],
61
  )
62
 
63
-
64
  if __name__ == "__main__":
65
  demo.launch()
 
1
  import gradio as gr
 
2
  import os
3
+ import cohere
4
+ from huggingface_hub import InferenceClient
5
+
6
+ # Retrieve API keys from environment variables
7
+ HF_API_KEY = os.getenv("HF_API_KEY")
8
+ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
9
 
10
+ # Initialize clients
11
+ hf_model = "meta-llama/Llama-3.2-3B-Instruct" # Change to preferred HF model
12
+ hf_client = InferenceClient(model=hf_model, token=HF_API_KEY)
13
 
14
+ cohere_client = cohere.Client(COHERE_API_KEY)
 
15
 
16
 
17
  def respond(
 
21
  max_tokens,
22
  temperature,
23
  top_p,
24
+ use_cohere, # Checkbox input
25
  ):
26
  messages = [{"role": "system", "content": system_message}]
27
+
28
  for val in history:
29
  if val[0]:
30
  messages.append({"role": "user", "content": val[0]})
 
33
 
34
  messages.append({"role": "user", "content": message})
35
 
36
+ # 🔹 **Switch API based on checkbox**
37
+ if use_cohere:
38
+ response = cohere_client.chat(
39
+ model="command-r-plus",
40
+ message=message,
41
+ chat_history=[{"user_name": "User", "text": h[0]} for h in history if h[0]] +
42
+ [{"user_name": "Assistant", "text": h[1]} for h in history if h[1]],
43
+ temperature=temperature,
44
+ max_tokens=max_tokens,
45
+ p=top_p,
46
+ )
47
+ return response.text # Cohere returns full response
48
 
49
+ else:
50
+ response = ""
51
+ for message in hf_client.chat_completion(
52
+ messages,
53
+ max_tokens=max_tokens,
54
+ stream=True,
55
+ temperature=temperature,
56
+ top_p=top_p,
57
+ ):
58
+ token = message.choices[0].delta.content
59
+ response += token
60
+ yield response # Hugging Face supports streaming
61
 
62
 
63
+ # 🔥 **Gradio UI with Checkbox**
 
 
64
  demo = gr.ChatInterface(
65
  respond,
66
  additional_inputs=[
 
74
  step=0.05,
75
  label="Top-p (nucleus sampling)",
76
  ),
77
+ gr.Checkbox(label="Use Cohere API", value=False), # Checkbox to toggle API
78
  ],
79
  )
80
 
 
81
  if __name__ == "__main__":
82
  demo.launch()