Chasottco commited on
Commit
d1ceeee
1 Parent(s): 99a85f8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -15
README.md CHANGED
@@ -23,8 +23,9 @@ This llama model was trained 2x faster with [Unsloth](https://github.com/unsloth
23
 
24
  ---
25
 
26
- #Google Colabでの動作を想定
27
 
 
28
  # 必要なライブラリをインストール
29
  %%capture
30
  !pip install unsloth
@@ -59,7 +60,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
59
  )
60
 
61
  # 元のモデルにLoRAのアダプタを統合
62
- model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
63
 
64
  # google drive mount(事前にデータをアップロード)
65
  from google.colab import drive
@@ -70,26 +71,27 @@ datasets = []
70
  with open("/content/drive/MyDrive/2024松尾研LLM/elyza-tasks-100-TV_0.jsonl", "r") as f:
71
  item = ""
72
  for line in f:
73
- line = line.strip()
74
- item += line
75
- if item.endswith("}"):
76
- datasets.append(json.loads(item))
77
- item = ""
 
78
 
79
  # モデルを用いてタスクの推論
80
  FastLanguageModel.for_inference(model)
81
 
82
  results = []
83
  for dt in tqdm(datasets):
84
- input = dt["input"]
 
 
85
 
86
- prompt = f"""### 指示\n{input}\n### 回答\n"""
87
 
88
- inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
 
89
 
90
- outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
91
- prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
92
 
93
- results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
94
-
95
- ---
 
23
 
24
  ---
25
 
26
+ # Google Colabでの動作を想定
27
 
28
+ ```python
29
  # 必要なライブラリをインストール
30
  %%capture
31
  !pip install unsloth
 
60
  )
61
 
62
  # 元のモデルにLoRAのアダプタを統合
63
+ model = PeftModel.from_pretrained(model, adapter_id, token=HF_TOKEN)
64
 
65
  # google drive mount(事前にデータをアップロード)
66
  from google.colab import drive
 
71
  with open("/content/drive/MyDrive/2024松尾研LLM/elyza-tasks-100-TV_0.jsonl", "r") as f:
72
  item = ""
73
  for line in f:
74
+ line = line.strip()
75
+ item += line
76
+ if item.endswith("}"):
77
+ datasets.append(json.loads(item))
78
+ item = ""
79
+
80
 
81
  # モデルを用いてタスクの推論
82
  FastLanguageModel.for_inference(model)
83
 
84
  results = []
85
  for dt in tqdm(datasets):
86
+ input = dt["input"]
87
+
88
+ prompt = f"""### 指示\n{input}\n### 回答\n"""
89
 
90
+ inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
91
 
92
+ outputs = model.generate(**inputs, max_new_tokens=512, use_cache=True, do_sample=False, repetition_penalty=1.2)
93
+ prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
94
 
95
+ results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
 
96
 
97
+ ---