Guchyos commited on
Commit
4258c2d
·
verified ·
1 Parent(s): a5ba0e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -41
app.py CHANGED
@@ -1,36 +1,34 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
- import os
5
- from huggingface_hub import login
6
 
7
- token = os.environ.get("HUGGINGFACE_TOKEN")
8
- if token:
9
- login(token=token)
10
 
11
- def predict(message, history):
12
- try:
 
13
  model_name = "Guchyos/gemma-2b-elyza-task"
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
15
-
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
- device_map="cpu",
19
- load_in_8bit=False, # 8bit量子化を無効化
20
- load_in_4bit=False, # 4bit量子化を無効化
21
- use_auth_token=True
22
  )
23
-
 
 
 
 
24
  prompt = f"質問: {message}\n\n回答:"
25
- inputs = tokenizer(prompt, return_tensors="pt") # .to(device)を削除
26
 
27
- outputs = model.generate(
28
- **inputs,
29
- max_new_tokens=256, # トークン数を減らして軽量化
30
- temperature=0.7,
31
- top_p=0.9,
32
- do_sample=True
33
- )
34
 
35
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
  return response.replace(prompt, "").strip()
@@ -40,25 +38,9 @@ def predict(message, history):
40
 
41
  demo = gr.ChatInterface(
42
  fn=predict,
43
- title="💬 Gemma 2 Quantized for ELYZA-tasks",
44
- description="""
45
- # ELYZA-tasks-100-TV用に最適化された日本語LLMです
46
-
47
- ## 使い方
48
- - 質問を入力してEnterキーを押してください
49
- - 生成には数秒かかります
50
-
51
- ## 特徴
52
- - CPU対応
53
- - 日本語に特化
54
- - ELYZA-tasks形式に対応
55
- """,
56
- examples=[
57
- "日本の四季について、それぞれの特徴を説明してください。",
58
- "人工知能の発展における倫理的な課題について説明してください。",
59
- "東京の主要な観光スポットを3つ挙げて、それぞれ説明してください。"
60
- ]
61
  )
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
 
4
 
5
+ model = None
6
+ tokenizer = None
 
7
 
8
+ def load_model():
9
+ global model, tokenizer
10
+ if model is None:
11
  model_name = "Guchyos/gemma-2b-elyza-task"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
15
+ torch_dtype=torch.float32, # float32を使用
16
+ device_map="cpu"
 
 
17
  )
18
+ return model, tokenizer
19
+
20
+ def predict(message, history):
21
+ try:
22
+ model, tokenizer = load_model()
23
  prompt = f"質問: {message}\n\n回答:"
24
+ inputs = tokenizer(prompt, return_tensors="pt")
25
 
26
+ with torch.no_grad():
27
+ outputs = model.generate(
28
+ **inputs,
29
+ max_new_tokens=128,
30
+ do_sample=False
31
+ )
 
32
 
33
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
  return response.replace(prompt, "").strip()
 
38
 
39
  demo = gr.ChatInterface(
40
  fn=predict,
41
+ title="💬 Gemma 2 for ELYZA-tasks",
42
+ description="ELYZA-tasks-100-TV用に最適化された日本語LLMです"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  )
44
 
45
  if __name__ == "__main__":
46
+ demo.launch(share=True)