import gradio as gr import os from dataclasses import dataclass, asdict from ctransformers import AutoModelForCausalLM, AutoConfig @dataclass class GenerationConfig: temperature: float top_k: int top_p: float repetition_penalty: float max_new_tokens: int seed: int reset: bool stream: bool threads: int stop: list[str] def format_prompt(user_prompt: str): return f"""### Instruction: {user_prompt} ### Response:""" def generate( llm: AutoModelForCausalLM, generation_config: GenerationConfig, user_prompt: str, ): """run model inference, will return a Generator if streaming is true""" generator = llm(format_prompt(user_prompt), **asdict(generation_config)) return generator.generated_text config = AutoConfig.from_pretrained( "teknium/Replit-v2-CodeInstruct-3B", context_length=2048 ) llm = AutoModelForCausalLM.from_pretrained( os.path.abspath("replit-v2-codeinstruct-3b.q4_1.bin"), model_type="replit", config=config, ) generation_config = GenerationConfig( temperature=0.2, top_k=50, top_p=0.9, repetition_penalty=1.0, max_new_tokens=512, # adjust as needed seed=42, reset=True, # reset history (cache) stream=True, # streaming per word/token threads=int(os.cpu_count() / 6), # adjust for your CPU stop=["<|endoftext|>"], ) user_prefix = "[user]: " assistant_prefix = f"[assistant]:" title = "Replit-v2-CodeInstruct-3b-ggml" description = "This space is an attempt to run the 4 bit quantized version of 'Replit's CodeInstruct 3B' on a CPU" example_1 = "Write a python script for a function which calculates the factorial of the number inputted by user." example_2 = "Write a python script which prints 'you are logged in' only if the user inputs a number between 1-10" examples = [example_1, example_2] UI = gr.Interface( fn=generate, inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."), outputs=gr.Textbox(label="Assistant"), title=title, description=description, examples=examples ) UI.launch() # while True: # user_prompt = input(user_prefix) # generator = generate(llm, generation_config, user_prompt.strip()) # print(assistant_prefix, end=" ", flush=True) # for word in generator: # print(word, end="", flush=True) # print("")