Spaces:
Running
on
T4
Running
on
T4
ffreemt
commited on
Commit
·
efc09da
1
Parent(s):
41e9e77
Update model replaced by stats.value.llm
Browse files
app.py
CHANGED
@@ -158,9 +158,9 @@ with gr.Blocks(
|
|
158 |
if not torch.cuda.is_available():
|
159 |
raise gr.Error("GPU not available, cant run. Turn on GPU and restart")
|
160 |
|
161 |
-
model_ = stats.value.llm
|
162 |
config = stats.value.config
|
163 |
-
|
164 |
def bot_stream(chat_history):
|
165 |
try:
|
166 |
message = chat_history[-1][0]
|
@@ -168,7 +168,8 @@ with gr.Blocks(
|
|
168 |
logger.error(f"{chat_history=}: {exc}")
|
169 |
raise gr.Error(f"{chat_history=}")
|
170 |
# yield chat_history
|
171 |
-
for elm in model.chat_stream(tokenizer, message, chat_history):
|
|
|
172 |
chat_history[-1] = [message, elm]
|
173 |
yield chat_history
|
174 |
|
|
|
158 |
if not torch.cuda.is_available():
|
159 |
raise gr.Error("GPU not available, cant run. Turn on GPU and restart")
|
160 |
|
161 |
+
# model_ = stats.value.llm # OOM
|
162 |
config = stats.value.config
|
163 |
+
stats.value.llm.generation_config.update(**asdict(config))
|
164 |
def bot_stream(chat_history):
|
165 |
try:
|
166 |
message = chat_history[-1][0]
|
|
|
168 |
logger.error(f"{chat_history=}: {exc}")
|
169 |
raise gr.Error(f"{chat_history=}")
|
170 |
# yield chat_history
|
171 |
+
# for elm in model.chat_stream(tokenizer, message, chat_history):
|
172 |
+
for elm in stats.value.llm.chat_stream(tokenizer, message, chat_history):
|
173 |
chat_history[-1] = [message, elm]
|
174 |
yield chat_history
|
175 |
|