ganchengguang's picture
Upload test.py
d6f3d4a
raw
history blame
1.74 kB
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
import torch
def generate_response(model, tokenizer, instruction, input_text, temperature, top_p, top_k, repeat_penalty):
PROMPT = f'''### Instruction:
{instruction}
### Input:
{input_text}
### Response:'''
input_ids = tokenizer.encode(PROMPT, return_tensors='pt')
max_length = len(input_ids[0]) + 50 # Example, you can set your preferred value
# Set generation parameters within given ranges
gen_parameters = {
'temperature': temperature,
'top_p': top_p,
'top_k': top_k,
'repetition_penalty': repeat_penalty,
'max_length': max_length,
'max_new_tokens': 50 # Example, you can set your preferred value
}
output = model.generate(input_ids, **gen_parameters)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response[len(PROMPT):] # Removing the prompt part
def main():
MODEL_NAME = 'Yoko-7B-Japanese-v1 ' # Replace with your model's file path or name
# Load pre-trained model and tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)
instruction = '次の問題を回答してください。'
# instruction = 'Please answer following question.'
input_text = '東京は何国の都市ですか?'
# Example parameter values, you can modify these within the ranges you provided
temperature = 0.6
top_p = 0.7
top_k = 40
repeat_penalty = 1.1
response = generate_response(model, tokenizer, instruction, input_text, temperature, top_p, top_k, repeat_penalty)
print('response'+response)
if __name__ == '__main__':
main()