GaborToth2 commited on
Commit
45bb735
·
1 Parent(s): 7ccaecf

removing cohere

Browse files
Files changed (2) hide show
  1. app.py +10 -26
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
- import cohere
5
  import faiss
6
  from transformers import pipeline
7
  from sentence_transformers import SentenceTransformer
@@ -23,10 +22,8 @@ index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
23
  index.add(document_embeddings_np)
24
 
25
 
26
- client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
27
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
28
- client_cohere = cohere.Client(COHERE_API_KEY)
29
- COHERE_MODEL = "command-r-plus"
30
 
31
  def respond(
32
  message,
@@ -35,7 +32,6 @@ def respond(
35
  max_tokens,
36
  temperature,
37
  top_p,
38
- use_cohere_api,
39
  ):
40
 
41
  query_embedding = embedding_model.encode([message], convert_to_tensor=True)
@@ -56,27 +52,16 @@ def respond(
56
 
57
  response = ""
58
 
59
-
60
- if use_cohere_api:
61
- cohere_response = client_cohere.chat(
62
- message=message,
63
- model=COHERE_MODEL,
64
- temperature=temperature,
65
- max_tokens=max_tokens
66
- )
67
- response = cohere_response.text
68
  yield response
69
- else:
70
- for message in client.chat_completion(
71
- messages,
72
- max_tokens=max_tokens,
73
- stream=True,
74
- temperature=temperature,
75
- top_p=top_p,
76
- ):
77
- token = message.choices[0].delta.content
78
- response += token
79
- yield response
80
 
81
  demo = gr.ChatInterface(
82
  respond,
@@ -91,7 +76,6 @@ demo = gr.ChatInterface(
91
  step=0.05,
92
  label="Top-p (nucleus sampling)",
93
  ),
94
- gr.Checkbox(label="Use Cohere API."),
95
  ],
96
  )
97
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
 
4
  import faiss
5
  from transformers import pipeline
6
  from sentence_transformers import SentenceTransformer
 
22
  index.add(document_embeddings_np)
23
 
24
 
25
+ client = InferenceClient("meta-llama/Llama-3.2-B-Instruct")
26
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 
 
27
 
28
  def respond(
29
  message,
 
32
  max_tokens,
33
  temperature,
34
  top_p,
 
35
  ):
36
 
37
  query_embedding = embedding_model.encode([message], convert_to_tensor=True)
 
52
 
53
  response = ""
54
 
55
+ for message in client.chat_completion(
56
+ messages,
57
+ max_tokens=max_tokens,
58
+ stream=True,
59
+ temperature=temperature,
60
+ top_p=top_p,
61
+ ):
62
+ token = message.choices[0].delta.content
63
+ response += token
64
  yield response
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  demo = gr.ChatInterface(
67
  respond,
 
76
  step=0.05,
77
  label="Top-p (nucleus sampling)",
78
  ),
 
79
  ],
80
  )
81
 
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
  huggingface_hub==0.25.2
2
- cohere
3
  faiss
4
  sentence_transformers
 
1
  huggingface_hub==0.25.2
 
2
  faiss
3
  sentence_transformers