File size: 2,712 Bytes
fc46f2c
 
 
372a5eb
562ba5d
372a5eb
562ba5d
fc46f2c
 
 
63bab05
 
fc46f2c
 
 
 
372a5eb
b388fe7
 
fc46f2c
 
 
 
 
 
 
 
3f93878
 
 
 
fc46f2c
3f93878
 
 
fc46f2c
3f93878
fc46f2c
 
 
 
 
3f93878
 
fc46f2c
 
 
3f93878
 
fc46f2c
 
 
 
3f93878
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc46f2c
 
372a5eb
 
fc46f2c
3f93878
 
fc46f2c
 
3f93878
fc46f2c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download, login
#import os

#login(os.getenv("HF_TOKEN")) my bad now its public

model = Llama(
    model_path=hf_hub_download(
        repo_id=os.environ.get("REPO_ID", "bartowski/HuatuoGPT-o1-7B-v0.1-GGUF"),
        filename=os.environ.get("MODEL_FILE", "HuatuoGPT-o1-7B-v0.1-Q4_0.gguf"),
    )
)

DESCRIPTION = '''
# FreedomIntelligence/HuatuoGPT-o1-7B | Duplicate the space and set it to private for faster & personal inference for free.
HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning.
It generates a complex thought process, reflecting and refining its reasoning, before providing a final response. 

**To start a new chat**, click "clear" and start a new dialog.
'''

LICENSE = """
--- Apache 2.0 License ---
"""

def user(message, history):
    return "", history + [{"role": "user", "content": message}]

def generate_text(history, max_tokens=512, temperature=0.9, top_p=0.95):
    """Generate a response using the Llama model."""
    messages = [{"role": item["role"], "content": item["content"]} for item in history[:-1]]
    message = history[-1]['content']
    
    response = model.create_chat_completion(
        messages=messages + [{"role": "user", "content": message}],
        temperature=temperature,
        max_tokens=max_tokens,
        top_p=top_p,
        stream=True,
    )
    history.append({"role": "assistant", "content": ""})

    for streamed in response:
        delta = streamed["choices"][0].get("delta", {})
        text_chunk = delta.get("content", "")
        history[-1]['content'] += text_chunk
        yield history

with gr.Blocks() as demo:
    gr.Markdown(DESCRIPTION)

    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    with gr.Accordion("Adjust Parameters", open=False):
        max_tokens = gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
        temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        generate_text, [chatbot, max_tokens, temperature, top_p], chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

    gr.Examples(
        examples=[
            ["How many r's are in the word strawberry?"],
            ['How to stop a cough?'],
            ['How do I relieve feet pain?'],
        ],
        inputs=msg,
        label="Examples",
    )

    gr.Markdown(LICENSE)

if __name__ == "__main__":
    demo.launch()