import sys
from ollama import Client
import gradio as gr

host_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434/"

client = Client(host=host_url)

model_list = client.list()
model_names = [model['model'] for model in model_list['models']]

def chat_ollama(user_input, history, Model):
    stream = client.chat(
        model=Model,
        messages=[
                {
                    'role': 'user', 
                    'content': user_input
                },
            ],
        stream=True,
    )

    partial_message = ""
    for chunk in stream:
        if len(chunk['message']['content']) != 0:
            partial_message = partial_message + chunk['message']['content']
            yield partial_message

with gr.Blocks(title="Ollama Chat", fill_height=True) as demo:
    gr.Markdown("# Ollama Chat")
    model_list = gr.Dropdown(model_names, value="llama3.1:latest", label="Model", info="Model to chat with")
    gr.ChatInterface(chat_ollama, additional_inputs=model_list)

if __name__ == "__main__":
    demo.launch()