from ctransformers import AutoModelForCausalLM import gradio as gr greety = """ A special thanks to the CCL AI team members who made a significant contribution to this project. """ llm = AutoModelForCausalLM.from_pretrained("pt_merge_model_v3.Q4_K_M.gguf", model_type='llama', max_new_tokens = 512, threads = 3, ) def stream(prompt, UL): system_prompt = 'You are a helpful AI assistant' prompt = f"[INST]<>\n + {system_prompt} + <>\n{prompt.strip()}[/INST]" return llm(prompt) css = """ h1 { text-align: center; } #duplicate-button { margin: auto; color: white; background: #1565c0; border-radius: 100vh; } .contain { max-width: 900px; margin: auto; padding-top: 1.5rem; } """ chat_interface = gr.ChatInterface( fn=stream, #additional_inputs_accordion_name = "Credentials", #additional_inputs=[ # gr.Textbox(label="OpenAI Key", lines=1), # gr.Textbox(label="Linkedin Access Token", lines=1), #], stop_btn=None, examples=[ ["explain Large language model"], ["what is quantum computing"], ["Explain what is atomic reaction"] ], ) with gr.Blocks(css=css) as demo: gr.HTML("

RoyalGPT Free LLM Deployment Space

") gr.HTML("

RoyalGPT💬

") gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button") chat_interface.render() gr.Markdown(greety) if __name__ == "__main__": demo.queue(max_size=10).launch(share=True)