Spaces:
Sleeping
Sleeping
File size: 2,695 Bytes
10e1692 f956d70 10e1692 05bde1f 10e1692 05bde1f a6c3106 05bde1f 11f229b 05bde1f 11f229b 05bde1f 10e1692 11f229b 05bde1f d1a0824 05bde1f d1a0824 05bde1f a6c3106 05bde1f 33f1e81 11f229b 05bde1f 30c43ba 369961f 2ba3e9d 05bde1f 8dd48d6 2414024 2ba3e9d 11f229b 8dd48d6 30c43ba 05bde1f 11f229b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
"""
cf https://huggingface.co/spaces/Nymbo/Qwen-2.5-72B-Instruct/blob/main/app.py
https://huggingface.co/spaces/prithivMLmods/Llama-3.1-8B-Instruct/blob/main/app.py
https://github.com/huggingface/huggingface-llama-recipes/blob/main/api_inference/inference-api.ipynb
"""
import gradio as gr
# from openai import OpenAI
from huggingface_hub import InferenceClient
import os
# ACCESS_TOKEN = os.getenv("HF_TOKEN")
# _ = """
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
# api_key=ACCESS_TOKEN,
api_key=os.getenv("HF_TOKEN", 'na')
)
# """
# client = InferenceClient()
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
try:
_ = client.chat.completions.create(
model="Qwen/Qwen2.5-72B-Instruct",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
)
for message in _:
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
yield str(e)
chatbot = gr.Chatbot(height=600)
css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
visibility: hidden
}
'''
demo = gr.ChatInterface(
respond,
type='messages',
# description='chatbox',
additional_inputs=[
gr.Textbox(value="", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=1, maximum=32768 // 2 - 500, value=32768 // 2 - 500, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
],
fill_height=True,
chatbot=chatbot,
css=css,
# examples=[{"role": "user", "content": "Define 'deep learning' in once sentence."}],
# retry_btn="Retry", # unexpected keyword argument 'retry_btn'
# undo_btn="Undo",
# clear_btn="Clear",
# stop_btn='Cancel',
# theme="allenai/gradio-theme",
# theme="Nymbo/Alyx_Theme",
)
if __name__ == "__main__":
demo.launch(ssr=False) |