# -*- coding: utf-8 -*-
"""NLP - GRADIO.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1mrV6DhKMqFwtVE1E89DygsKexajnb60N
"""

import gradio as gr
import torch
from transformers import pipeline

def generate_chat_response(query):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device)

    # Define system message to indicate context
    system_message = "You are a medical chatbot who is answering questions about cancer. Please be considerate."

    # Convert query to Gradio input format
    gradio_input = [{"role": "system", "content": system_message}, {"role": "user", "content": query}]

    # Apply chat template
    prompt = pipe.tokenizer.apply_chat_template(gradio_input, add_special_tokens=False, tokenize=False, add_prefix_space=True, add_generation_prompt=True)

    # Generate response
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)

    # Extract generated text
    generated_responses = [output["generated_text"] for output in outputs]
    return generated_responses

# Define Gradio interface
demo = gr.Interface(
    fn=generate_chat_response,
    inputs="text",
    outputs="text",
    title="Medical Chatbot",
    description="Enter your question about cancer to interact with the medical chatbot."
)

# Launch Gradio interface
demo.launch()