Spaces:

sca255
/

chargen

Runtime error

App Files Files Community

sca255 commited on Aug 29, 2024

Commit

b80e447

verified ·

1 Parent(s): e4716cd

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -19

app.py CHANGED Viewed

@@ -1,10 +1,18 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
@@ -24,29 +32,53 @@ def respond(
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
-demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
@@ -56,9 +88,8 @@ demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)",
         ),
     ],
-    theme=gr.themes.dark
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import hf_transfer
+from transformers import AutoModelForCausalLM, AutoTokenizer,StoppingCriteriaList,TextIteratorStreamer
+from threading import Thread
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1",
+)
+tknz=AutoTokenizer.from_pretrained("kubernetes-bad/chargen-v2")
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 def respond(
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
+    model_inputs = tokenizer.build_chat_input(history=messages, role='user').input_ids.to(
+        next(model.parameters()).device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=600, skip_prompt=True)
+    eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
+                    tokenizer.get_command("<|observation|>")]
+    generate_kwargs = {
+        "input_ids": model_inputs,
+        "streamer": streamer,
+        "max_new_tokens": max_tokens,
+        "do_sample": True,
+        "top_p": top_p,
+        "temperature": temperature,
+        "stopping_criteria": StoppingCriteriaList([stop]),
+        "repetition_penalty": 1,
+        "eos_token_id": eos_token_id,
+    }
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    for new_token in streamer:
+        if new_token and '<|user|>' in new_token:
+            new_token = new_token.split('<|user|>')[0]
+        if new_token:
+            history[-1][1] += new_token
+        yield history
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
+js_func = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
+"""
+app = gr.ChatInterface(
+    js=js_func
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a bot who generates perfect roleplaying charecters.", label="System message"),
+        gr.Slider(minimum=1, maximum=, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             label="Top-p (nucleus sampling)",
         ),
     ],
 )
 if __name__ == "__main__":
+    app.launch()