File size: 2,637 Bytes
ab1c53c
03dfa3e
7d30c6b
03dfa3e
 
 
 
28e426c
c49afbd
 
03dfa3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6665ac0
c08ad64
9c4c9bc
a74838a
 
9c4c9bc
aae460b
c08ad64
a74838a
aae460b
 
c08ad64
 
a74838a
 
 
aae460b
a74838a
 
 
9c4c9bc
c49afbd
 
 
a74838a
aae460b
c08ad64
a74838a
 
c08ad64
 
9c4c9bc
993810e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
from huggingface_hub import InferenceClient


"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

import os
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()

import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

# Load Hugging Face API token securely
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

if not api_token:
    raise ValueError("❌ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")

# Define model names
base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
peft_model_name = "Hrushi02/Root_Math"

# Load base model with authentication
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name, 
    torch_dtype=torch.float16, 
    device_map="auto",
    use_auth_token=api_token  # βœ… Correct
)


# Load fine-tuned model
model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)