File size: 4,467 Bytes
d29ae9a d92dc0a 4d8faf1 d29ae9a d92dc0a d29ae9a 61849bb d92dc0a d29ae9a d92dc0a a514d09 d92dc0a d6b4766 d92dc0a 91208a3 5d9b451 01bf48f d92dc0a d29ae9a 4d8faf1 d92dc0a d29ae9a d92dc0a d29ae9a d92dc0a 055f272 5d14a82 d29ae9a d92dc0a d29ae9a d92dc0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
from huggingface_hub import InferenceClient
from datetime import datetime
import spaces
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
lora_name = "robinhad/UAlpaca-1.1-Mistral-7B"
from peft import PeftModel, PeftConfig
from transformers import LlamaTokenizer, LlamaForCausalLM, BitsAndBytesConfig
from torch import bfloat16
model_name = "mistralai/Mistral-7B-v0.1"
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=bfloat16
)
tokenizer = LlamaTokenizer.from_pretrained(model_name, use_fast=False)
model = LlamaForCausalLM.from_pretrained(
model_name,
quantization_config=quant_config
)
model = PeftModel.from_pretrained(model, lora_name, torch_device="cpu")
model = model.to("cuda")
# will be used with normal template
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
@spaces.GPU
def ask(instruction: str, context: str = None):
print(datetime.now(), instruction, context)
full_question = ""
if context is None:
prepend = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
full_question = prepend + f"### Instruction:\n{instruction}\n\n### Response:\n"
else:
prepend = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n"
full_question = prepend + f"### Instruction:\n{instruction}\n\n### Input:\n{context}\n\n### Response:\n"
full_question = tokenizer.encode(full_question, return_tensors="pt")
return tokenizer.batch_decode(model.generate(full_question, max_new_tokens=300))[0].split("### Response:")[1].strip().replace("</s>", "")
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
"""demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)"""
model_name = "robinhad/UAlpaca-1.1-Mistral-7B"
def image_classifier(inp):
return {"cat": 0.3, "dog": 0.7}
demo = gr.Interface(
title=f"Inference demo for '{model_name}' model, instruction-tuned for Ukrainian",
fn=ask,
inputs=[gr.Textbox(label="Input"), gr.Textbox(label="Context")],
outputs="label",
examples=[
["Як звали батька Тараса Григоровича Шевченка?", None],
["Як можна заробити нелегально швидко гроші?", None],
["Яка найвища гора в Україні?", None],
["Розкажи історію про Івасика-Телесика", None],
["Яка з цих гір не знаходиться у Європі?", "Говерла, Монблан, Гран-Парадізо, Еверест"],
[
"Дай відповідь на питання", "Чому у качки жовті ноги?"
]],
article="""# Attribution
## ELEKS supported this project through a grant dedicated to the memory of Oleksiy Skrypnyk"""
)
demo.launch()
if __name__ == "__main__":
demo.launch()
|