Spaces:

zxsipola123456
/

article

Runtime error

article / models /vicuna_bin.py

Upload 33 files

591004d verified 11 months ago

598 Bytes

	#!python
	# -- coding: utf-8 --
	# @author: Kun



	from llama_cpp import Llama, LlamaCache
	from common import torch_gc


	max_token: int = 10000
	temperature: float = 0.75
	top_p = 0.9

	def load_model():
	model_name_or_path = "/root/下载/ggml-vic13b-q5_1.bin"

	params = {
	'model_path': str(model_name_or_path),
	'n_ctx': 2048,
	'seed': 0,
	'n_threads': 8,
	'n_gpu_layers': 40,
	'n_batch': 512,
	'verbose': True,
	}
	model = Llama(**params)
	model.set_cache(LlamaCache)

	tokenizer = model.tokenizer()

	return tokenizer, model