Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,026 Bytes
06cb66f 7c2445a 06cb66f 7c2445a 06cb66f df2e419 06cb66f 7c2445a 06cb66f 7c2445a 06cb66f 120ace1 ae40c8c 120ace1 06cb66f 7c2445a 06cb66f 7c2445a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import spaces
import os
from huggingface_hub import Repository
from huggingface_hub import login
login(token = os.environ['HUB_TOKEN'])
repo = Repository(
local_dir="backend_fn",
repo_type="dataset",
clone_from=os.environ['DATASET'],
token=True,
git_email='[email protected]'
)
repo.git_pull()
import json
import uuid
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
from backend_fn.feedback import feedback
from gradio_modal import Modal
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
model_name = "Merdeka-LLM/merdeka-llm-3.2b-128k-instruct"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
streamer = TextIteratorStreamer(tokenizer, timeout=300, skip_prompt=True, skip_special_tokens=True)
histories = []
action = None
session_id = uuid.uuid1().__str__()
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
# system_message,
max_tokens = 4096,
temperature = 0.01,
top_p = 0.95,
):
messages = [
{"role": "system", "content": "You are a professional lawyer who is familiar with Malaysia Law."}
]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generate_kwargs = dict(
model_inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
streamer=streamer
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
for new_token in streamer:
if new_token != '<':
response += new_token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
def submit_feedback(value):
feedback(session_id, json.dumps(histories), value, action)
with gr.Blocks() as demo:
def vote(history,data: gr.LikeData):
global histories
global action
histories = history
action = data.liked
with Modal(visible=False) as modal:
textb = gr.Textbox(
label='Actual response',
info='Leave blank if the answer is good enough'
)
submit_btn = gr.Button(
'Submit'
)
submit_btn.click(submit_feedback,textb)
submit_btn.click(lambda: Modal(visible=False), None, modal)
submit_btn.click(lambda x: gr.update(value=''), [],[textb])
ci = gr.ChatInterface(
respond,
# fill_height=True
# additional_inputs=[
# # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
# gr.Slider(
# minimum=0.1,
# maximum=1.0,
# value=0.95,
# step=0.05,
# label="Top-p (nucleus sampling)",
# ),
# ],
)
ci.chatbot.show_copy_button=True
# ci.chatbot.value=[(None,"Hello! I'm here to assist you with understanding the laws and acts of Malaysia.")]
# ci.chatbot.height=500
ci.chatbot.like(vote, ci.chatbot, None).then(
lambda: Modal(visible=True), None, modal
)
if __name__ == "__main__":
demo.launch(
)
|