Spaces:

mikeee
/

qwen-7b-chat

Running on T4

ffreemt commited on Aug 18, 2023

Commit

efc09da

1 Parent(s): 41e9e77

Update model replaced by stats.value.llm

Files changed (1) hide show

app.py CHANGED Viewed

@@ -158,9 +158,9 @@ with gr.Blocks(
     if not torch.cuda.is_available():
         raise gr.Error("GPU not available, cant run. Turn on GPU and restart")
-    model_ = stats.value.llm
     config = stats.value.config
-    model_.generation_config.update(**asdict(config))
     def bot_stream(chat_history):
         try:
             message = chat_history[-1][0]
@@ -168,7 +168,8 @@ with gr.Blocks(
             logger.error(f"{chat_history=}: {exc}")
             raise gr.Error(f"{chat_history=}")
             # yield chat_history
-        for elm in model.chat_stream(tokenizer, message, chat_history):
             chat_history[-1] = [message, elm]
             yield chat_history

     if not torch.cuda.is_available():
         raise gr.Error("GPU not available, cant run. Turn on GPU and restart")
+    # model_ = stats.value.llm  # OOM
     config = stats.value.config
+    stats.value.llm.generation_config.update(**asdict(config))
     def bot_stream(chat_history):
         try:
             message = chat_history[-1][0]
             logger.error(f"{chat_history=}: {exc}")
             raise gr.Error(f"{chat_history=}")
             # yield chat_history
+        # for elm in model.chat_stream(tokenizer, message, chat_history):
+        for elm in stats.value.llm.chat_stream(tokenizer, message, chat_history):
             chat_history[-1] = [message, elm]
             yield chat_history