Spaces:
Running
Running
File size: 2,726 Bytes
3342760 6f54f79 3342760 545d478 3342760 6f54f79 3342760 545d478 3342760 545d478 3342760 545d478 3342760 ddfa581 3342760 545d478 3342760 545d478 bce3ed1 545d478 6126672 545d478 3342760 545d478 3342760 545d478 03b2fd6 6076eed 545d478 ddfa581 3342760 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from huggingface_hub import InferenceClient
import spaces
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("mkurman/llama-3.2-MEDIT-3B-o1")
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
# system_message,
max_tokens,
temperature,
top_p,
):
# messages = [{"role": "system", "content": system_message}]
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
counter = 0
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
logprobs=True,
stop=['</Output>']
):
text = message.choices[0].delta.content
token = message.choices[0].logprobs.content[0].token
if token in ['<Thought>', '<|python_tag|>']:
text = '## Thinking:\n\n' + text
if token == '<Output>':
if counter > 0:
text = '## Output:\n\n' + text
else:
text = '## Thinking:\n\n' + text
if token == '</Output>':
yield response
break
response += text
counter += 1
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
title="LLama 3.2 MedIT 3B o1",
description="Built with Llama. Please note that this model is not a source of knowledge and is not intended to provide 100% accurate answers. If the model provides answers that are generally considered inappropriate, please contact me.",
additional_inputs=[
# gr.Textbox(value="You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability. You always think and reflect before providing final answers in a step-by-step manner.", label="System message"),
gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"),
gr.Slider(minimum=0.0, maximum=2.0, value=0.1, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|