Spaces:
Sleeping
Sleeping
File size: 2,517 Bytes
89c908e 2350bde 89c908e 67621e9 89c908e a5a87a4 2350bde a5a87a4 89c908e a5a87a4 89c908e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
import os
from dataclasses import dataclass, asdict
from ctransformers import AutoModelForCausalLM, AutoConfig
@dataclass
class GenerationConfig:
temperature: float
top_k: int
top_p: float
repetition_penalty: float
max_new_tokens: int
seed: int
reset: bool
stream: bool
threads: int
stop: list[str]
def format_prompt(user_prompt: str):
return f"""### Instruction:
{user_prompt}
### Response:"""
def generate(
llm: AutoModelForCausalLM,
generation_config: GenerationConfig,
user_prompt: str,
):
"""run model inference, will return a Generator if streaming is true"""
return llm(format_prompt(user_prompt), **asdict(generation_config))
config = AutoConfig.from_pretrained(
"teknium/Replit-v2-CodeInstruct-3B", context_length=2048
)
llm = AutoModelForCausalLM.from_pretrained(
os.path.abspath("replit-v2-codeinstruct-3b.q4_1.bin"),
model_type="replit",
config=config,
)
generation_config = GenerationConfig(
temperature=0.2,
top_k=50,
top_p=0.9,
repetition_penalty=1.0,
max_new_tokens=512, # adjust as needed
seed=42,
reset=True, # reset history (cache)
stream=True, # streaming per word/token
threads=int(os.cpu_count() / 6), # adjust for your CPU
stop=["<|endoftext|>"],
)
user_prefix = "[user]: "
assistant_prefix = f"[assistant]:"
title = "Replit-v2-CodeInstruct-3b-ggml"
description = "This space is an attempt to run the 4 bit quantized version of 'Replit's CodeInstruct 3B' on a CPU"
example_1 = "Write a python script for a function which calculates the factorial of the number inputted by user."
example_2 = "Write a python script which prints 'you are logged in' only if the user inputs a number between 1-10"
examples = [example_1, example_2]
def generate_code(user_input):
response = generate(llm, generation_config, user_input)
code = ""
for word in response:
code = code + word
print(code)
return code
UI = gr.Interface(
fn=generate_code,
inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."),
outputs=gr.Textbox(label="Assistant"),
title=title,
description=description,
examples=examples
)
UI.launch()
# while True:
# user_prompt = input(user_prefix)
# generator = generate(llm, generation_config, user_prompt.strip())
# print(assistant_prefix, end=" ", flush=True)
# for word in generator:
# print(word, end="", flush=True)
# print("") |