|
import llama_cpp |
|
from llama_cpp import Llama |
|
|
|
import gradio as gr |
|
|
|
from huggingface_hub import hf_hub_download |
|
|
|
model_name = "large-traversaal/Alif-1.0-8B-Instruct" |
|
model_file = "model-Q8_0.gguf" |
|
model_path_file = hf_hub_download(model_name, |
|
filename=model_file,) |
|
|
|
|
|
llama = Llama( |
|
model_path=model_path_file, |
|
n_gpu_layers=40, |
|
n_threads=8, |
|
n_batch=512, |
|
n_ctx=4096, |
|
verbose=True |
|
) |
|
|
|
chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:""" |
|
|
|
|
|
def chat_with_ai(prompt): |
|
query = chat_prompt.format(inp=prompt) |
|
|
|
|
|
response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) |
|
|
|
text = "" |
|
for chunk in response: |
|
content = chunk["choices"][0]["text"] |
|
if content: |
|
text += content |
|
yield text |
|
|
|
|
|
|
|
demo = gr.Interface( |
|
fn=chat_with_ai, |
|
inputs="text", |
|
outputs="text", |
|
title="Streaming Alif-1.0-8B-Instruct Chatbot π", |
|
description="Enter a prompt and get a streamed response." |
|
) |
|
|
|
|
|
demo.launch(share=True) |