File size: 4,170 Bytes
4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 930a613 4a6b784 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import os
import json
import subprocess
import gradio as gr
from threading import Thread
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from datetime import datetime
# Load model from Hugging Face Hub
MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct"
MODEL_FILE = "model-Q8_0.gguf"
model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE)
# Initialize Llama model
llama = Llama(
model_path=model_path_file,
n_gpu_layers=40, # Adjust based on VRAM
n_threads=8, # Match CPU cores
n_batch=512, # Optimize for better VRAM usage
n_ctx=4096, # Context window size
verbose=True # Enable debug logging
)
CHAT_TEMPLATE = "Alif Chat"
CONTEXT_LENGTH = 4096
COLOR = "blue"
EMOJI = "💬"
DESCRIPTION = "Urdu AI Chatbot powered by Llama.cpp"
# Function to generate responses
def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
response = llama(chat_prompt, max_tokens=max_new_tokens, stop=["Q:", "\n"], echo=False, stream=True)
text = ""
for chunk in response:
content = chunk["choices"][0]["text"]
if content:
text += content
yield text
# Create Gradio interface
with gr.Blocks() as demo:
chatbot = gr.Chatbot(label="Urdu Chatbot", likeable=True, render=False)
chat = gr.ChatInterface(
generate_response,
chatbot=chatbot,
title=EMOJI + " " + "Alif-1.0 Chatbot",
description=DESCRIPTION,
examples=[
["شہر کراچی کے بارے میں بتاؤ"],
["قابل تجدید توانائی کیا ہے؟"],
["پاکستان کی تاریخ کے بارے میں بتائیں۔"]
],
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
additional_inputs=[
gr.Textbox("", label="System prompt", render=False),
gr.Slider(0, 1, 0.6, label="Temperature", render=False),
gr.Slider(128, CONTEXT_LENGTH, 1024, label="Max new tokens", render=False),
gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
],
theme=gr.themes.Soft(primary_hue=COLOR),
)
demo.queue(max_size=20).launch(share=True)
# import llama_cpp
# from llama_cpp import Llama
# # import llama_cpp.llama_tokenizer
# import gradio as gr
# from huggingface_hub import hf_hub_download
# model_name = "large-traversaal/Alif-1.0-8B-Instruct"
# model_file = "model-Q8_0.gguf"
# model_path_file = hf_hub_download(model_name,
# filename=model_file,)
# llama = Llama(
# model_path=model_path_file,
# n_gpu_layers=40, # Adjust based on VRAM
# n_threads=8, # Match CPU cores
# n_batch=512, # Optimize for better VRAM usage
# n_ctx=4096, # Context window size
# verbose=True # Enable debug logging
# )
# chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
# # Function to generate text with streaming output
# def chat_with_ai(prompt):
# query = chat_prompt.format(inp=prompt)
# #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
# response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
# text = ""
# for chunk in response:
# content = chunk["choices"][0]["text"]
# if content:
# text += content
# yield text
# # Gradio UI setup
# demo = gr.Interface(
# fn=chat_with_ai, # Streaming function
# inputs="text", # User input
# outputs="text", # Model response
# title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
# description="Enter a prompt and get a streamed response."
# )
# # Launch the Gradio app
# demo.launch(share=True) |