Spaces:

m96tkmok
/

Llama3.2_Taiwan_Chatbot_v01

Sleeping

App Files Files Community

m96tkmok commited on Dec 6, 2024

Commit

17bf3db

verified ·

1 Parent(s): 40c1dbc

Create app.py

Browse files

Files changed (1) hide show

app.py +94 -0

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from threading import Thread
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+TITLE = "<h1><center>Chat with lianghsun/Llama-3.2-Taiwan-3B</center></h1>"
+DESCRIPTION = "<h3><center>Visit <a href='https://huggingface.co/lianghsun/Llama-3.2-Taiwan-3B' target='_blank'> the model page</a> for details.</center></h3>"
+DEFAULT_SYSTEM = "你是一個產自台灣的聊天機械人, 你以台灣本地人的身份, 使用正體中文回答問題."
+CSS = """
+.duplicate-button {
+  margin: auto !important;
+  color: white !important;
+  background: green !important;
+  border-radius: 100vh !important;
+}
+"""
+tokenizer = AutoTokenizer.from_pretrained("lianghsun/Llama-3.2-Taiwan-3B")
+model = AutoModelForCausalLM.from_pretrained("lianghsun/Llama-3.2-Taiwan-3B", torch_dtype="auto", device_map="auto")
+def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
+    conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
+    for prompt, answer in history:
+        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
+    conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
+        model.device
+    )
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        input_ids=input_ids,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        do_sample=True,
+    )
+    if temperature == 0:
+        generate_kwargs["do_sample"] = False
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    output = ""
+    for new_token in streamer:
+        output += new_token
+        yield output
+chatbot = gr.Chatbot(height=450)
+with gr.Blocks(css=CSS) as demo:
+    gr.HTML(TITLE)
+    gr.HTML(DESCRIPTION)
+    gr.ChatInterface(
+        fn=stream_chat,
+        chatbot=chatbot,
+        fill_height=True,
+        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
+        additional_inputs=[
+            gr.Text(
+                value="",
+                label="System",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=0,
+                maximum=1,
+                step=0.1,
+                value=0.8,
+                label="Temperature",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=128,
+                maximum=4096,
+                step=1,
+                value=1024,
+                label="Max new tokens",
+                render=False,
+            ),
+        ],
+    )
+if __name__ == "__main__":
+    demo.launch()