replit-3b-ggml_models

Running

App Files Files Community

replit-3b-ggml_models / app.py

ML610

Update app.py

de651f6 almost 2 years ago

raw

history blame contribute delete

2.25 kB

	import gradio as gr

	import os
	from dataclasses import dataclass, asdict
	from ctransformers import AutoModelForCausalLM, AutoConfig


	@dataclass
	class GenerationConfig:
	temperature: float
	top_k: int
	top_p: float
	repetition_penalty: float
	max_new_tokens: int
	seed: int
	reset: bool
	stream: bool
	threads: int
	stop: list[str]


	def format_prompt(user_prompt: str):
	return f"""### Instruction:
	{user_prompt}

	### Response:"""


	def generate(
	llm: AutoModelForCausalLM,
	generation_config: GenerationConfig,
	user_prompt: str,
	):
	"""run model inference, will return a Generator if streaming is true"""

	return llm(format_prompt(user_prompt), **asdict(generation_config))

	config = AutoConfig.from_pretrained(
	"teknium/Replit-v2-CodeInstruct-3B", context_length=2048
	)
	llm = AutoModelForCausalLM.from_pretrained(
	os.path.abspath("replit-code-instruct-glaive.ggmlv1.q4_1.bin"),
	model_type="replit",
	config=config,
	)

	generation_config = GenerationConfig(
	temperature=0.2,
	top_k=50,
	top_p=0.9,
	repetition_penalty=1.0,
	max_new_tokens=512, # adjust as needed
	seed=42,
	reset=True, # reset history (cache)
	stream=True, # streaming per word/token
	threads=int(os.cpu_count() / 6), # adjust for your CPU
	stop=["<\|endoftext\|>"],
	)

	user_prefix = "[user]: "
	assistant_prefix = f"[assistant]:"

	title = "Replit-v2-CodeInstruct-3b-ggml"
	description = "This space is an attempt to run the GGML 4 bit quantized version of 'Replit's CodeInstruct 3B' on a CPU"

	example_1 = "Write a python script for a function which calculates the factorial of the number inputted by user."
	example_2 = "Write a python script which prints 'you are logged in' only if the user inputs a number between 1-10"

	examples = [example_1, example_2]

	def generate_code(user_input):
	response = generate(llm, generation_config, user_input)
	code = ""
	for word in response:
	code = code + word
	return code

	UI = gr.Interface(
	fn=generate_code,
	inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."),
	outputs=gr.Textbox(label="Assistant"),
	title=title,
	description=description,
	examples=examples
	)

	UI.launch()