# Think Paraguayo import os import random import time os.system("wget https://huggingface.co/thinkPy/gua-a_v0.1-ft_mistral-7b_GGUF/resolve/main/gua-a_v0.1-ft_mistral-7b_q4_K_M.gguf -O model.gguf") from llama_cpp import Llama import gradio as gr from ragatouille import RAGPretrainedModel from llama_index.core import Document, SimpleDirectoryReader from llama_index.core.node_parser import SentenceSplitter max_seq_length = 256 prompt = """Eres gua-a un modelo de lenguaje entrenado para responder preguntas sobre la cultura guaraní, debes responder de forma clara, amable, concisa y solamente en el lenguaje español, si encuentras las respuesta en este prompt puedes copiarla. Contexto -------------------------- {} -------------------------- ### Pregunta: {} ### Respuesta: {}""" llm = Llama(model_path="model.gguf", n_ctx=max_seq_length, n_threads=2) DOC_PATH = "/home/user/app/index" RAG = RAGPretrainedModel.from_pretrained("AdrienB134/ColBERTv2.0-spanish-mmarcoES") RAG = RAG.from_index(DOC_PATH, n_gpu=None) RAG.search("init", None, k=1) def reformat_rag(results_rag): if results_rag is not None: return [result["content"] for result in results_rag] else: return [""] def chat_stream_completion(message, history): context = reformat_rag(RAG.search(message, None, k=1)) context = " \n ".join(context) full_prompt = prompt.format(context,message,"") print(full_prompt) response = llm.create_completion( prompt=full_prompt, temperature=0.1, max_tokens=max_seq_length, stream=True ) message_repl = "" for chunk in response: if len(chunk['choices'][0]["text"]) != 0: message_repl = message_repl + chunk['choices'][0]["text"] yield message_repl css = """ h1 { font-size: 32px; text-align: center; } h2 { text-align: center; } img { height: 750px; /* Reducing the image height */ } """ def launcher(): with gr.Blocks(css=css) as demo: gr.Markdown("# Think Paraguayo") gr.Markdown("## Conoce la cultura guaraní.") with gr.Row(variant='panel'): with gr.Column(scale=1): gr.Image(value="think_paraguayo.jpeg", type="filepath", label="") with gr.Column(scale=1): chatbot = gr.ChatInterface( fn=chat_stream_completion, retry_btn = None, stop_btn = None, undo_btn = None ).queue() demo.launch() if __name__ == "__main__": launcher()