File size: 2,712 Bytes
fc46f2c 372a5eb 562ba5d 372a5eb 562ba5d fc46f2c 63bab05 fc46f2c 372a5eb b388fe7 fc46f2c 3f93878 fc46f2c 3f93878 fc46f2c 3f93878 fc46f2c 3f93878 fc46f2c 3f93878 fc46f2c 3f93878 fc46f2c 372a5eb fc46f2c 3f93878 fc46f2c 3f93878 fc46f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download, login
#import os
#login(os.getenv("HF_TOKEN")) my bad now its public
model = Llama(
model_path=hf_hub_download(
repo_id=os.environ.get("REPO_ID", "bartowski/HuatuoGPT-o1-7B-v0.1-GGUF"),
filename=os.environ.get("MODEL_FILE", "HuatuoGPT-o1-7B-v0.1-Q4_0.gguf"),
)
)
DESCRIPTION = '''
# FreedomIntelligence/HuatuoGPT-o1-7B | Duplicate the space and set it to private for faster & personal inference for free.
HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning.
It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.
**To start a new chat**, click "clear" and start a new dialog.
'''
LICENSE = """
--- Apache 2.0 License ---
"""
def user(message, history):
return "", history + [{"role": "user", "content": message}]
def generate_text(history, max_tokens=512, temperature=0.9, top_p=0.95):
"""Generate a response using the Llama model."""
messages = [{"role": item["role"], "content": item["content"]} for item in history[:-1]]
message = history[-1]['content']
response = model.create_chat_completion(
messages=messages + [{"role": "user", "content": message}],
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stream=True,
)
history.append({"role": "assistant", "content": ""})
for streamed in response:
delta = streamed["choices"][0].get("delta", {})
text_chunk = delta.get("content", "")
history[-1]['content'] += text_chunk
yield history
with gr.Blocks() as demo:
gr.Markdown(DESCRIPTION)
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox()
clear = gr.Button("Clear")
with gr.Accordion("Adjust Parameters", open=False):
max_tokens = gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
generate_text, [chatbot, max_tokens, temperature, top_p], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
gr.Examples(
examples=[
["How many r's are in the word strawberry?"],
['How to stop a cough?'],
['How do I relieve feet pain?'],
],
inputs=msg,
label="Examples",
)
gr.Markdown(LICENSE)
if __name__ == "__main__":
demo.launch() |