replit-3b-ggml_models

Sleeping

App Files Files Community

ML610 commited on Jun 30, 2023

Commit

89c908e

1 Parent(s): 0cd8a49

Create app.py

Browse files

Files changed (1) hide show

app.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+import os
+from dataclasses import dataclass, asdict
+from ctransformers import AutoModelForCausalLM, AutoConfig
+@dataclass
+class GenerationConfig:
+    temperature: float
+    top_k: int
+    top_p: float
+    repetition_penalty: float
+    max_new_tokens: int
+    seed: int
+    reset: bool
+    stream: bool
+    threads: int
+    stop: list[str]
+def format_prompt(user_prompt: str):
+    return f"""### Instruction:
+{user_prompt}
+### Response:"""
+def generate(
+    llm: AutoModelForCausalLM,
+    generation_config: GenerationConfig,
+    user_prompt: str,
+):
+    """run model inference, will return a Generator if streaming is true"""
+    return llm(
+        format_prompt(
+            user_prompt,
+        ),
+        **asdict(generation_config),
+    )
+config = AutoConfig.from_pretrained(
+    "teknium/Replit-v2-CodeInstruct-3B", context_length=2048
+)
+llm = AutoModelForCausalLM.from_pretrained(
+    os.path.abspath("replit-v2-codeinstruct-3b.q4_1.bin"),
+    model_type="replit",
+    config=config,
+)
+generation_config = GenerationConfig(
+    temperature=0.2,
+    top_k=50,
+    top_p=0.9,
+    repetition_penalty=1.0,
+    max_new_tokens=512,  # adjust as needed
+    seed=42,
+    reset=True,  # reset history (cache)
+    stream=True,  # streaming per word/token
+    threads=int(os.cpu_count() / 6),  # adjust for your CPU
+    stop=["<|endoftext|>"],
+)
+user_prefix = "[user]: "
+assistant_prefix = f"[assistant]:"
+title = "Replit-v2-CodeInstruct-3b-ggml"
+description = "This space is an attempt to run the 4 bit quantized version of 'Replit's CodeInstruct 3B' on CPU"
+example_1 = "Write a python script for a function which calculates the factorial of the number inputted by user."
+example_2 = "Write a python script which prints 'you are logged in' only if the user inputs a number between 1-10"
+examples = [example_1, example_2]
+UI = gr.Interface(
+    fn=generate,
+    inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."),
+    outputs=gr.Textbox(label="Assistant"),
+    title=title,
+    description=description,
+    examples=examples
+)
+UI.launch()
+# while True:
+#     user_prompt = input(user_prefix)
+#     generator = generate(llm, generation_config, user_prompt.strip())
+#     print(assistant_prefix, end=" ", flush=True)
+#     for word in generator:
+#         print(word, end="", flush=True)
+#     print("")