File size: 2,637 Bytes
ab1c53c 03dfa3e 7d30c6b 03dfa3e 28e426c c49afbd 03dfa3e 6665ac0 c08ad64 9c4c9bc a74838a 9c4c9bc aae460b c08ad64 a74838a aae460b c08ad64 a74838a aae460b a74838a 9c4c9bc c49afbd a74838a aae460b c08ad64 a74838a c08ad64 9c4c9bc 993810e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import gradio as gr
from huggingface_hub import InferenceClient
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
import os
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
# Load Hugging Face API token securely
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not api_token:
raise ValueError("β ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
# Define model names
base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
peft_model_name = "Hrushi02/Root_Math"
# Load base model with authentication
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
use_auth_token=api_token # β
Correct
)
# Load fine-tuned model
model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
|