Spaces:

rcarioniporras
/

finetuned-llm

Sleeping

File size: 1,385 Bytes

d19f14b
232d877
389bcc2
6d635d0
d19f14b
61b1f0f
565913e
d19f14b
2a84c1f
6d635d0
232d877
6d635d0
232d877
 
 
 
 
2a84c1f
232d877
2a84c1f
232d877
2a84c1f
232d877
2a84c1f
232d877
 
 
2a84c1f
 
6d635d0
 
 
 
 
 
 
1f742b2
6d635d0
ae5112e
d19f14b

from llama_cpp import Llama
import gradio as gr


llm = Llama.from_pretrained(
	repo_id="rcarioniporras/model_modelcentric_llama_gguf",
	filename="unsloth.Q4_K_M.gguf",
)


def predict(message, history):
    messages = [{"role": "system", "content": "You are a helpful assistant who answers questions in a concise but thorough way. Prioritize clarity and usefulness in all interactions."}]
    for user_message, bot_message in history:
        if user_message:
            messages.append({"role": "user", "content": user_message})
        if bot_message:
            messages.append({"role": "assistant", "content": bot_message})
    messages.append({"role": "user", "content": message})
    
    response = ""
    for chunk in llm.create_chat_completion(
        stream=True,
        messages=messages,
    ):
        part = chunk["choices"][0]["delta"].get("content", None)
        if part:
            response += part
        yield response

conversation_starters = [
    {"text": "What is object-oriented programming (OOP), and what are its four main principles?"},
    {"text": "Compare the stack and queue data structures."},
    {"text": "Simplify the expression 3(x+4)−2(2x−1)."},
    {"text": "What are some fun facts about space?"}
]

demo = gr.ChatInterface(fn=predict,  theme="Shivi/calm_seafoam")
                        
if __name__ == "__main__":
    demo.launch()