ffreemt commited on
Commit
1a8bf1d
β€’
1 Parent(s): f820383

Update cuda().half(), fix timezone

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  # pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
2
  # import gradio as gr
3
 
@@ -6,17 +9,24 @@
6
 
7
  # %%writefile demo-4bit.py
8
 
 
 
9
  from textwrap import dedent
10
 
11
  import gradio as gr
12
  import mdtex2html
13
  import torch
14
  from loguru import logger
15
-
16
- # credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
17
- # while mistakes are mine
18
  from transformers import AutoModel, AutoTokenizer
19
 
 
 
 
 
 
 
 
 
20
  model_name = "THUDM/chatglm2-6b"
21
  # model_name = "THUDM/chatglm2-6b-int4"
22
 
@@ -33,9 +43,9 @@ has_cuda = torch.cuda.is_available()
33
  # has_cuda = False # force cpu
34
 
35
  if has_cuda:
36
- model = AutoModel.from_pretrained(
37
- model_name, trust_remote_code=True
38
- ).cuda().half() # 3.92G
39
  else:
40
  model = AutoModel.from_pretrained(
41
  model_name, trust_remote_code=True
@@ -179,7 +189,7 @@ def retry_last_answer(
179
  history.pop(-1)
180
 
181
  yield from predict(
182
- RETRY_FLAG,
183
  user_input,
184
  chatbot,
185
  max_length,
@@ -196,7 +206,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) a
196
  """<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
197
  )
198
 
199
- with gr.Accordion("Info", open=False):
200
  _ = """
201
  ## ChatGLM2-6B-int4
202
 
 
1
+ """Credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
2
+ while mistakes are mine
3
+ """
4
  # pylint: disable=broad-exception-caught, redefined-outer-name, missing-function-docstring, missing-module-docstring, too-many-arguments, line-too-long, invalid-name, redefined-builtin, redefined-argument-from-local
5
  # import gradio as gr
6
 
 
9
 
10
  # %%writefile demo-4bit.py
11
 
12
+ import os
13
+ import time
14
  from textwrap import dedent
15
 
16
  import gradio as gr
17
  import mdtex2html
18
  import torch
19
  from loguru import logger
 
 
 
20
  from transformers import AutoModel, AutoTokenizer
21
 
22
+ # fix timezone in Linux
23
+ os.environ["TZ"] = "Asia/Shanghai"
24
+ try:
25
+ time.tzset() # type: ignore # pylint: disable=no-member
26
+ except Exception:
27
+ # Windows
28
+ logger.warning("Windows, cant run time.tzset()")
29
+
30
  model_name = "THUDM/chatglm2-6b"
31
  # model_name = "THUDM/chatglm2-6b-int4"
32
 
 
43
  # has_cuda = False # force cpu
44
 
45
  if has_cuda:
46
+ model = (
47
+ AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda().half()
48
+ ) # 3.92G
49
  else:
50
  model = AutoModel.from_pretrained(
51
  model_name, trust_remote_code=True
 
189
  history.pop(-1)
190
 
191
  yield from predict(
192
+ RETRY_FLAG, # type: ignore
193
  user_input,
194
  chatbot,
195
  max_length,
 
206
  """<center><a href="https://huggingface.co/spaces/mikeee/chatglm2-6b-4bit?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>To avoid the queue and for faster inference Duplicate this Space and upgrade to GPU</center>"""
207
  )
208
 
209
+ with gr.Accordion("🎈 Info", open=False):
210
  _ = """
211
  ## ChatGLM2-6B-int4
212