--- base_model: - google/gemma-2-27b tags: - text-generation-inference - transformers - unsloth - gemma2 - trl license: gemma language: - ja --- # Uploaded model - **Developed by:** hama-jp - **License:** Gemma Terms of Use - **Finetuned from model :** google/gemma-2-27b :: Improved using Qwen This gemma2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library. [](https://github.com/unslothai/unsloth) # output.jsonlの生成方法 ```python %%capture !pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # Install Flash Attention 2 for softcapping support import torch if torch.cuda.get_device_capability()[0] >= 8: !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3" ``` ```python from unsloth import FastLanguageModel import torch import json max_seq_length = 4096 dtype = None load_in_4bit = True model, tokenizer = FastLanguageModel.from_pretrained( model_name = "hama-jp/gemma2-27b-sft-241213-lora-06", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) ``` ```python #@title ELYZA-tasks-100-TVの読み込み import json # testファイルのパスを指定 file_path = 'elyza-tasks-100-TV_0.jsonl' # データセットの辞書を初期化 dataset_test = {} # JSONLファイルを読み込む with open(file_path, 'r', encoding='utf-8') as file: for line in file: # 各行をJSON形式で読み取る task_data = json.loads(line.strip()) # task_idとinputを取得 task_id = task_data.get("task_id") input_data = task_data.get("input") # task_idをキーにしてdataset_testに格納 if task_id is not None: dataset_test[task_id] = {"input": input_data} EOS_TOKEN = tokenizer.eos_token # プロンプトテンプレート alpaca_prompt = """### 指示 以下の入力に従って適切に処理してください。 ### 入力: {} ### 出力: """ # dataset_testに"text"キーを追加 for task_id, content in dataset_test.items(): input_text = content["input"] prompt_text = alpaca_prompt.format(input_text) + EOS_TOKEN dataset_test[task_id]["text"] = prompt_text ``` ```python from unsloth import FastLanguageModel FastLanguageModel.for_inference(model) # Enable native 2x faster inference def extract_response(full_text): """ Extracts the response part after '### 出力:'. Assumes the response starts after ':\n### 出力' and removes any trailing whitespace. """ response_marker = "\n### 出力:" if response_marker in full_text: return full_text.split(response_marker, 1)[-1].strip() return full_text.strip() with open("output.jsonl", "w", encoding="utf-8") as outfile: for i in range(100): # Get the input text input_text = dataset_test[i]["text"] # Tokenize and move input to GPU inputs = tokenizer(input_text, return_tensors="pt").to("cuda") # Generate output output = model.generate( **inputs, max_new_tokens=1024, temperature=0.15, repetition_penalty=1.05, use_cache=True, do_sample=True ) # Decode output text decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) # Extract only the response part response_only = extract_response(decoded_output) # Print for debugging print("task_id:",i) print("input:",dataset_test[i]["input"]) print("output:",response_only) print("---") # Prepare a dictionary for JSONL result = { "task_id": i, "input": dataset_test[i]["input"], "output": response_only } # Save to JSONL outfile.write(json.dumps(result, ensure_ascii=False) + "\n") ```