Spaces:
Sleeping
Sleeping
Commit
·
45bb735
1
Parent(s):
7ccaecf
removing cohere
Browse files- app.py +10 -26
- requirements.txt +0 -1
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import os
|
4 |
-
import cohere
|
5 |
import faiss
|
6 |
from transformers import pipeline
|
7 |
from sentence_transformers import SentenceTransformer
|
@@ -23,10 +22,8 @@ index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
|
|
23 |
index.add(document_embeddings_np)
|
24 |
|
25 |
|
26 |
-
client = InferenceClient("meta-llama/Llama-3.2-
|
27 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
28 |
-
client_cohere = cohere.Client(COHERE_API_KEY)
|
29 |
-
COHERE_MODEL = "command-r-plus"
|
30 |
|
31 |
def respond(
|
32 |
message,
|
@@ -35,7 +32,6 @@ def respond(
|
|
35 |
max_tokens,
|
36 |
temperature,
|
37 |
top_p,
|
38 |
-
use_cohere_api,
|
39 |
):
|
40 |
|
41 |
query_embedding = embedding_model.encode([message], convert_to_tensor=True)
|
@@ -56,27 +52,16 @@ def respond(
|
|
56 |
|
57 |
response = ""
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
response
|
68 |
yield response
|
69 |
-
else:
|
70 |
-
for message in client.chat_completion(
|
71 |
-
messages,
|
72 |
-
max_tokens=max_tokens,
|
73 |
-
stream=True,
|
74 |
-
temperature=temperature,
|
75 |
-
top_p=top_p,
|
76 |
-
):
|
77 |
-
token = message.choices[0].delta.content
|
78 |
-
response += token
|
79 |
-
yield response
|
80 |
|
81 |
demo = gr.ChatInterface(
|
82 |
respond,
|
@@ -91,7 +76,6 @@ demo = gr.ChatInterface(
|
|
91 |
step=0.05,
|
92 |
label="Top-p (nucleus sampling)",
|
93 |
),
|
94 |
-
gr.Checkbox(label="Use Cohere API."),
|
95 |
],
|
96 |
)
|
97 |
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import os
|
|
|
4 |
import faiss
|
5 |
from transformers import pipeline
|
6 |
from sentence_transformers import SentenceTransformer
|
|
|
22 |
index.add(document_embeddings_np)
|
23 |
|
24 |
|
25 |
+
client = InferenceClient("meta-llama/Llama-3.2-B-Instruct")
|
26 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
|
|
|
|
27 |
|
28 |
def respond(
|
29 |
message,
|
|
|
32 |
max_tokens,
|
33 |
temperature,
|
34 |
top_p,
|
|
|
35 |
):
|
36 |
|
37 |
query_embedding = embedding_model.encode([message], convert_to_tensor=True)
|
|
|
52 |
|
53 |
response = ""
|
54 |
|
55 |
+
for message in client.chat_completion(
|
56 |
+
messages,
|
57 |
+
max_tokens=max_tokens,
|
58 |
+
stream=True,
|
59 |
+
temperature=temperature,
|
60 |
+
top_p=top_p,
|
61 |
+
):
|
62 |
+
token = message.choices[0].delta.content
|
63 |
+
response += token
|
64 |
yield response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
demo = gr.ChatInterface(
|
67 |
respond,
|
|
|
76 |
step=0.05,
|
77 |
label="Top-p (nucleus sampling)",
|
78 |
),
|
|
|
79 |
],
|
80 |
)
|
81 |
|
requirements.txt
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
huggingface_hub==0.25.2
|
2 |
-
cohere
|
3 |
faiss
|
4 |
sentence_transformers
|
|
|
1 |
huggingface_hub==0.25.2
|
|
|
2 |
faiss
|
3 |
sentence_transformers
|