llama-cpp-python

Runtime error

File size: 1,393 Bytes

06cf9c4
ed5c4cc
 
 
06cf9c4
ed5c4cc
06cf9c4
ed5c4cc
 
360ead8
3402c51
00b813c
3402c51
06cf9c4
3402c51
 
 
 
 
 
 
 
117600f
47e41bf
00b813c
 
 
 
 
 
 
 
 
47e41bf
 
 
 
 
00b813c
 
 
 
06cf9c4
ef70bbb
06cf9c4
117600f

import gradio as gr
import time
import ctypes #to run on C api directly 
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download #load from huggingfaces 


llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/airoboros-l2-13b-gpt4-m2.0-GGML", filename="airoboros-l2-13b-gpt4-m2.0.ggmlv3.q6_K.bin"), n_ctx=2048) #download model from hf/ n_ctx=2048 for high ccontext length

history = []
h = []
history.append(["Hi there!", "Hello, how can I help you?"])

def generate_text(input_text, history):
    conversation_context = " ".join([f"{pair[0]} {pair[1]}" for pair in history])
    full_conversation = f"{conversation_context} Q: {input_text} \n A:"
    
    output = llm(full_conversation, max_tokens=1024, stop=["Q:", "\n"], echo=True)
    response = output['choices'][0]['text']
    history.append([input_text, response])
    
    return "", history

def bot(history):
        bot_message = history
        h[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            time.sleep(0.05)
            yield h  


with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    msg.submit(generate_text, [msg, chatbot], [msg, chatbot]).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()