Spaces:

OptimalScale
/

Robin-7b

Runtime error

App Files Files Community

hendrydong commited on Jun 16, 2023

Commit

f075841

1 Parent(s): 67d7a5a

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -18

app.py CHANGED Viewed

@@ -5,13 +5,16 @@
 """
 import logging
 import json
 import sys
 import warnings
 import gradio as gr
 from dataclasses import dataclass, field
 from transformers import HfArgumentParser
 from typing import Optional
-import torch
 from lmflow.datasets.dataset import Dataset
 from lmflow.pipeline.auto_pipeline import AutoPipeline
 from lmflow.models.auto_model import AutoModel
@@ -70,13 +73,13 @@ css = """
 @dataclass
 class ChatbotArguments:
     prompt_structure: Optional[str] = field(
-        default="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:",
         metadata={
             "help": "prompt structure given user's input text"
         },
     )
     end_string: Optional[str] = field(
-        default="#",
         metadata={
             "help": "end string mark of the chatbot's output"
         },
@@ -94,7 +97,6 @@ class ChatbotArguments:
         },
     )
 def main():
     pipeline_name = "inferencer"
     PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)
@@ -111,6 +113,7 @@ def main():
     pipeline_args.deepspeed = "configs/ds_config_chatbot.json"
     model_args.torch_dtype = "float16"
     with open (pipeline_args.deepspeed, "r") as f:
         ds_config = json.load(f)
@@ -119,6 +122,7 @@ def main():
         tune_strategy='none',
         ds_config=ds_config,
         device=pipeline_args.device,
     )
     # We don't need input data, we will read interactively from stdin
@@ -150,19 +154,28 @@ def main():
     token_per_step = 4
-    def chat_stream( context, query: str, history= None, **kwargs):
         if history is None:
             history = []
         print_index = 0
         context += prompt_structure.format(input_text=query)
-        context = context[-model.get_max_length():]
         input_dataset = dataset.from_dict({
             "type": "text_only",
-            "instances": [ { "text": context } ]
         })
-        for response, flag_break in inferencer.stream_inference(context=context, model=model, max_new_tokens=chatbot_args.max_new_tokens,
                                         token_per_step=token_per_step, temperature=chatbot_args.temperature,
                                         end_string=end_string, input_dataset=input_dataset):
             delta = response[print_index:]
@@ -171,22 +184,15 @@ def main():
             yield delta, history + [(query, seq)]
             if flag_break:
-                context += response + "\n"
                 break
     def predict(input, history=None):
-        try:
-            global context
-            context = ""
-        except SyntaxError:
-            pass
         if history is None:
             history = []
-        for response, history in chat_stream(context, input, history):
             updates = []
             for query, response in history:
                 updates.append(gr.update(visible=True, value="" + query))
@@ -201,7 +207,6 @@ def main():
     with gr.Blocks(css=css) as demo:
         gr.HTML(title)
-        gr.HTML('''<center><a href="https://huggingface.co/spaces/OptimalScale/Robin-7b?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space"></a></center>''')
         state = gr.State([])
         text_boxes = []
         for i in range(MAX_BOXES):
@@ -221,6 +226,5 @@ def main():
 if __name__ == "__main__":
     main()

 """
 import logging
 import json
+import os
 import sys
+sys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0])))
+import torch
 import warnings
 import gradio as gr
 from dataclasses import dataclass, field
 from transformers import HfArgumentParser
 from typing import Optional
 from lmflow.datasets.dataset import Dataset
 from lmflow.pipeline.auto_pipeline import AutoPipeline
 from lmflow.models.auto_model import AutoModel
 @dataclass
 class ChatbotArguments:
     prompt_structure: Optional[str] = field(
+        default="{input_text}",
         metadata={
             "help": "prompt structure given user's input text"
         },
     )
     end_string: Optional[str] = field(
+        default="\n\n",
         metadata={
             "help": "end string mark of the chatbot's output"
         },
         },
     )
 def main():
     pipeline_name = "inferencer"
     PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)
     pipeline_args.deepspeed = "configs/ds_config_chatbot.json"
     model_args.torch_dtype = "float16"
     with open (pipeline_args.deepspeed, "r") as f:
         ds_config = json.load(f)
         tune_strategy='none',
         ds_config=ds_config,
         device=pipeline_args.device,
+        torch_dtype=torch.float16
     )
     # We don't need input data, we will read interactively from stdin
     token_per_step = 4
+    def hist2context(hist):
+        context = ""
+        for query, response in hist:
+            context += prompt_structure.format(input_text=query)
+            if not (response is None):
+                context += response
+        return context
+    def chat_stream(query: str, history= None, **kwargs):
         if history is None:
             history = []
+        context = hist2context(history)
         print_index = 0
         context += prompt_structure.format(input_text=query)
+        context_ = context[-model.get_max_length():]
         input_dataset = dataset.from_dict({
             "type": "text_only",
+            "instances": [ { "text": context_ } ]
         })
+        print(context_)
+        for response, flag_break in inferencer.stream_inference(context=context_, model=model, max_new_tokens=chatbot_args.max_new_tokens,
                                         token_per_step=token_per_step, temperature=chatbot_args.temperature,
                                         end_string=end_string, input_dataset=input_dataset):
             delta = response[print_index:]
             yield delta, history + [(query, seq)]
             if flag_break:
                 break
     def predict(input, history=None):
         if history is None:
             history = []
+        for response, history in chat_stream(input, history):
             updates = []
             for query, response in history:
                 updates.append(gr.update(visible=True, value="" + query))
     with gr.Blocks(css=css) as demo:
         gr.HTML(title)
         state = gr.State([])
         text_boxes = []
         for i in range(MAX_BOXES):
 if __name__ == "__main__":
     main()