|
import gradio as gr |
|
import os |
|
import requests |
|
from llama_cpp import Llama |
|
|
|
llm_name = "MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF" |
|
llm_path = os.path.basename(llm_name) |
|
|
|
gguf_model = "Q4_K_M.gguf" |
|
|
|
|
|
def download_llms(llm_name): |
|
"""Download GGUF model""" |
|
download_url = "" |
|
print("Downloading " + llm_name) |
|
download_url = f"https://huggingface.co/MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF/resolve/main/{gguf_model}" |
|
|
|
if not os.path.exists("model"): |
|
os.makedirs("model") |
|
|
|
llm_filename = os.path.basename(download_url) |
|
llm_temp_file_path = os.path.join("model", llm_filename) |
|
|
|
if os.path.exists(llm_temp_file_path): |
|
print("Model already available") |
|
else: |
|
response = requests.get(download_url, stream=True) |
|
if response.status_code == 200: |
|
with open(llm_temp_file_path, 'wb') as f: |
|
for chunk in response.iter_content(chunk_size=1024): |
|
if chunk: |
|
f.write(chunk) |
|
|
|
print("Download completed") |
|
else: |
|
print(f"Model download unsuccessful {response.status_code}") |
|
|
|
|
|
def initialize_llm(llm_model): |
|
model_path = "" |
|
if llm_model == llm_name: |
|
model_path = f"model/{gguf_model}" |
|
download_llms(llm_model) |
|
llm = Llama( |
|
model_path=model_path, |
|
n_ctx=1024, |
|
verbose=False |
|
) |
|
return llm |
|
|
|
llm = initialize_llm(llm_name) |
|
|
|
|
|
def format_prompt(input_text, history): |
|
system_prompt = """You are a helpful AI assistant. You are truthful in your response for real-world matters |
|
but you are also creative for imaginative/fictional tasks.""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt = "" |
|
if history: |
|
for previous_prompt, response in history: |
|
prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{previous_prompt}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>" |
|
prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant" |
|
return prompt |
|
|
|
|
|
def generate(prompt, history, max_new_tokens=512): |
|
if not history: |
|
history = [] |
|
|
|
|
|
|
|
|
|
kwargs = dict( |
|
|
|
max_tokens=max_new_tokens, |
|
|
|
stop=["<|im_end|>"] |
|
) |
|
|
|
formatted_prompt = format_prompt(prompt, history) |
|
|
|
|
|
response = llm(formatted_prompt, **kwargs, stream=True) |
|
output = "" |
|
for chunk in response: |
|
output += chunk['choices'][0]['text'] |
|
yield output |
|
return output |
|
|
|
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot(height=500) |
|
with gr.Blocks(theme=gr.themes.Default(primary_hue="sky")) as demo: |
|
gr.HTML("<center><h1>Fine-tuned Meta-Llama-3-8B Chatbot</h1><center>") |
|
gr.Markdown("<b>This AI agent is using the MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF model for text-generation.</b>") |
|
gr.ChatInterface( |
|
generate, |
|
chatbot=chatbot, |
|
retry_btn=None, |
|
undo_btn=None, |
|
clear_btn="Clear", |
|
|
|
|
|
examples=[["What is a large language model?"], ["What is the meaning of life?"], ["Write a short story about a fictional planet named 'Orca'."]] |
|
|
|
|
|
) |
|
demo.queue().launch() |