import gradio as gr
from llama_cpp import Llama
#from transformers import AutoModelForCausalLM
# Load the model
llm = Llama.from_pretrained(
    repo_id = "uonlp/Vistral-7B-Chat-gguf",
    filename="ggml-vistral-7B-chat-f16.gguf"

# Define the function to interact with the model
def chat_with_model(user_input):
    response = llm.generate(
        messages=[
            {"role": "user", "content": user_input}
        ]
    )
    return response['choices'][0]['message']['content']

# Create the Gradio interface
iface = gr.Interface(
    fn=chat_with_model,
    inputs="text",
    outputs="text",
    title="QA-medical Chatbot",
    description="Ask the model any medical question !"
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()