Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
·
30f7268
1
Parent(s):
88399f9
bugfix: models non operational on CPU
Browse files- assets/custom.css +6 -6
- modules/models.py +10 -11
assets/custom.css
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
font-size: var(--text-xxl);
|
9 |
line-height: 1.3;
|
10 |
text-align: left;
|
11 |
-
margin-top: 6px;
|
12 |
white-space: nowrap;
|
13 |
}
|
14 |
#description {
|
@@ -17,9 +17,9 @@
|
|
17 |
}
|
18 |
|
19 |
/* 覆盖gradio的页脚信息QAQ */
|
20 |
-
footer {
|
21 |
display: none !important;
|
22 |
-
}
|
23 |
#footer {
|
24 |
text-align: center;
|
25 |
}
|
@@ -35,7 +35,7 @@ footer {
|
|
35 |
position: absolute;
|
36 |
max-height: 30px;
|
37 |
}
|
38 |
-
/* user_info */
|
39 |
#user_info {
|
40 |
white-space: nowrap;
|
41 |
position: absolute; left: 8em; top: .2em;
|
@@ -132,7 +132,7 @@ footer {
|
|
132 |
border-radius: 12px;
|
133 |
}
|
134 |
.apSwitch input {
|
135 |
-
display: none !important;
|
136 |
}
|
137 |
.apSlider {
|
138 |
background-color: var(--block-label-background-fill);
|
@@ -146,7 +146,7 @@ footer {
|
|
146 |
font-size: 18px;
|
147 |
border-radius: 12px;
|
148 |
}
|
149 |
-
.apSlider::before {
|
150 |
bottom: -1.5px;
|
151 |
left: 1px;
|
152 |
position: absolute;
|
|
|
8 |
font-size: var(--text-xxl);
|
9 |
line-height: 1.3;
|
10 |
text-align: left;
|
11 |
+
margin-top: 6px;
|
12 |
white-space: nowrap;
|
13 |
}
|
14 |
#description {
|
|
|
17 |
}
|
18 |
|
19 |
/* 覆盖gradio的页脚信息QAQ */
|
20 |
+
/* footer {
|
21 |
display: none !important;
|
22 |
+
} */
|
23 |
#footer {
|
24 |
text-align: center;
|
25 |
}
|
|
|
35 |
position: absolute;
|
36 |
max-height: 30px;
|
37 |
}
|
38 |
+
/* user_info */
|
39 |
#user_info {
|
40 |
white-space: nowrap;
|
41 |
position: absolute; left: 8em; top: .2em;
|
|
|
132 |
border-radius: 12px;
|
133 |
}
|
134 |
.apSwitch input {
|
135 |
+
display: none !important;
|
136 |
}
|
137 |
.apSlider {
|
138 |
background-color: var(--block-label-background-fill);
|
|
|
146 |
font-size: 18px;
|
147 |
border-radius: 12px;
|
148 |
}
|
149 |
+
.apSlider::before {
|
150 |
bottom: -1.5px;
|
151 |
left: 1px;
|
152 |
position: absolute;
|
modules/models.py
CHANGED
@@ -235,25 +235,21 @@ class ChatGLM_Client(BaseLLMModel):
|
|
235 |
quantified = False
|
236 |
if "int4" in model_name:
|
237 |
quantified = True
|
238 |
-
|
239 |
-
model = AutoModel.from_pretrained(
|
240 |
model_source, trust_remote_code=True
|
241 |
-
)
|
242 |
-
else:
|
243 |
-
model = AutoModel.from_pretrained(
|
244 |
-
model_source, trust_remote_code=True
|
245 |
-
).half()
|
246 |
if torch.cuda.is_available():
|
247 |
# run on CUDA
|
248 |
logging.info("CUDA is available, using CUDA")
|
249 |
-
model = model.cuda()
|
250 |
# mps加速还存在一些问题,暂时不使用
|
251 |
elif system_name == "Darwin" and model_path is not None and not quantified:
|
252 |
logging.info("Running on macOS, using MPS")
|
253 |
# running on macOS and model already downloaded
|
254 |
-
model = model.to("mps")
|
255 |
else:
|
256 |
logging.info("GPU is not available, using CPU")
|
|
|
257 |
model = model.eval()
|
258 |
CHATGLM_MODEL = model
|
259 |
|
@@ -483,8 +479,11 @@ class XMBot_Client(BaseLLMModel):
|
|
483 |
"data": question
|
484 |
}
|
485 |
response = requests.post(self.url, json=data)
|
486 |
-
|
487 |
-
|
|
|
|
|
|
|
488 |
|
489 |
|
490 |
|
|
|
235 |
quantified = False
|
236 |
if "int4" in model_name:
|
237 |
quantified = True
|
238 |
+
model = AutoModel.from_pretrained(
|
|
|
239 |
model_source, trust_remote_code=True
|
240 |
+
)
|
|
|
|
|
|
|
|
|
241 |
if torch.cuda.is_available():
|
242 |
# run on CUDA
|
243 |
logging.info("CUDA is available, using CUDA")
|
244 |
+
model = model.half().cuda()
|
245 |
# mps加速还存在一些问题,暂时不使用
|
246 |
elif system_name == "Darwin" and model_path is not None and not quantified:
|
247 |
logging.info("Running on macOS, using MPS")
|
248 |
# running on macOS and model already downloaded
|
249 |
+
model = model.half().to("mps")
|
250 |
else:
|
251 |
logging.info("GPU is not available, using CPU")
|
252 |
+
model = model.float()
|
253 |
model = model.eval()
|
254 |
CHATGLM_MODEL = model
|
255 |
|
|
|
479 |
"data": question
|
480 |
}
|
481 |
response = requests.post(self.url, json=data)
|
482 |
+
try:
|
483 |
+
response = json.loads(response.text)
|
484 |
+
return response["data"], len(response["data"])
|
485 |
+
except Exception as e:
|
486 |
+
return response.text, len(response.text)
|
487 |
|
488 |
|
489 |
|