Spaces:

DevashishBhake
/

Falcon_7B_Instruct_GGML

Runtime error

App Files Files Community

DevashishBhake commited on Sep 15, 2023

Commit

df4e7d2

1 Parent(s): cf03eb1

Upload app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import gradio as gr
+from ctransformers import AutoModelForCausalLM
+USER_NAME = "User"
+BOT_NAME = "Falcon"
+DEFAULT_INSTRUCTIONS = f"""The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins."""
+STOP_STR = f"\n{USER_NAME}:"
+STOP_SUSPECT_LIST = [":", "\n", "User"]
+temperature = 0.8
+top_p = 0.9
+llm = AutoModelForCausalLM.from_pretrained("TheBloke/falcon-40b-instruct-GGML", model_file="falcon40b-instruct.ggmlv3.q2_K.bin",
+                                           model_type="falcon", threads=8)
+def format_chat_prompt(message: str, instructions: str) -> str:
+    instructions = instructions.strip(" ").strip("\n")
+    prompt = instructions
+    prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
+    return prompt
+def run_chat(message: str):
+        prompt = format_chat_prompt(message, DEFAULT_INSTRUCTIONS)
+        stream = llm(
+            prompt,
+            max_new_tokens=1024,
+            stop=[STOP_STR, "<|endoftext|>", USER_NAME],
+            temperature=temperature,
+            top_p=top_p,
+            stream=True
+        )
+        acc_text = ""
+        for idx, response in enumerate(stream):
+            text_token = response
+            if text_token in STOP_SUSPECT_LIST:
+                acc_text += text_token
+                continue
+            if idx == 0 and text_token.startswith(" "):
+                text_token = text_token[1:]
+            acc_text += text_token
+        return acc_text
+demo = gr.Interface(
+    fn=run_chat,
+    inputs=gr.inputs.Textbox(label="Message"),
+    outputs=gr.outputs.Textbox(label="Generated Text"),
+)
+demo.launch()