import gradio as gr
import pandas as pd
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline, T5Tokenizer, T5ForConditionalGeneration

# Initialize BART and T5 models and tokenizers for summarization
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')

# Initialize Summarization pipeline for BART
summarizer = pipeline("summarization", model=bart_model, tokenizer=bart_tokenizer)

# Healthcare and AI keyword lists
healthcare_keywords = ["disease", "cancer", "patient", "treatment", "health", "illness", "medicine", "symptom", "diagnosis", "epidemic", "infection"]
ai_keywords = ["algorithm", "artificial intelligence", "machine learning", "neural network", "AI", "model", "deep learning", "prediction", "data"]

# Function to classify the domain (Healthcare, AI, or both)
def classify_domain(title, abstract):
    healthcare_detected = any(keyword.lower() in (title + abstract).lower() for keyword in healthcare_keywords)
    ai_detected = any(keyword.lower() in (title + abstract).lower() for keyword in ai_keywords)

    if healthcare_detected and ai_detected:
        return "Healthcare, AI"  # Both healthcare and AI
    elif healthcare_detected:
        return "HealthCare"
    elif ai_detected:
        return "AI"
    return "General"

# Function to generate extractive summaries using BART
def extractive_summary(text):
    summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
    return summary[0]['summary_text']

# Healthcare Agent to enhance healthcare-related content (focusing on diseases and treatments)
def healthcare_agent(abstract):
    # Check if healthcare-related keywords are in the abstract
    healthcare_relevant_text = " ".join([sentence for sentence in abstract.split('.') if any(keyword in sentence.lower() for keyword in healthcare_keywords)])

    # If healthcare-related sentences are found, generate a summary
    if healthcare_relevant_text:
        healthcare_summary = extractive_summary(healthcare_relevant_text)
        return healthcare_summary
    else:
        return "Not related to Healthcare"

# AI Agent to enhance AI-related content (focusing on algorithms and machine learning)
def ai_agent(abstract):
    # Check if AI-related keywords are in the abstract
    ai_relevant_text = " ".join([sentence for sentence in abstract.split('.') if any(keyword in sentence.lower() for keyword in ai_keywords)]).strip()
    if ai_relevant_text:
        ai_summary = extractive_summary(ai_relevant_text)
        return ai_summary
    else:
        return "Not related to AI"

# Function to generate general summary which is paraphrased (using T5 model for rephrasing)
def generate_general_summary(abstract):
    # Use T5 model to paraphrase the abstract and generate a general summary
    input_text = f"summarize: {abstract}"
    input_ids = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = t5_model.generate(input_ids, max_length=150, num_beams=4, length_penalty=2.0, early_stopping=True)
    summary = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to generate collaborative insights between healthcare and AI
def generate_collaborative_insights(abstract, title, domain):
    # Initialize summary placeholders
    general_summary = generate_general_summary(abstract)
    healthcare_summary = healthcare_agent(abstract)
    ai_summary = ai_agent(abstract)

    # Collaborative summary if both healthcare and AI are involved
    if domain == "Healthcare, AI":
        collaborative_summary = f"Collaborative Insights between Healthcare and AI: {healthcare_summary} {ai_summary}"
    else:
        collaborative_summary = "Not related to both Healthcare and AI"  # Collaborative insight will not be generated if the domain does not match

    # If domain doesn't match healthcare or AI, use general summary and not related for the respective fields
    if domain == "General":
        healthcare_summary = "Not related to Healthcare"
        ai_summary = "Not related to AI"

    return general_summary, healthcare_summary, ai_summary, collaborative_summary

# Function to process a single abstract
def process_single_abstract(title, abstract):
    domain = classify_domain(title, abstract)
    general_summary, healthcare_summary, ai_summary, collaborative_summary = generate_collaborative_insights(abstract, title, domain)
    return general_summary, healthcare_summary, ai_summary, collaborative_summary

# Function to process a CSV file
def process_csv(file):
    df = pd.read_csv(file.name)

    # Ensure the required columns are present
    if 'Title' not in df.columns or 'Abstract' not in df.columns:
        return "CSV file must contain 'Title' and 'Abstract' columns."

    # Prepare a list to store results
    results = []

    # Process each row in the CSV file
    for index, row in df.iterrows():
        title = row['Title']
        abstract = row['Abstract']

        # Classify the domain (Healthcare, AI, or both)
        domain = classify_domain(title, abstract)

        # Generate summaries
        general_summary, healthcare_summary, ai_summary, collaborative_summary = generate_collaborative_insights(abstract, title, domain)

        # Store the results
        results.append({
            'Title': title,
            'Abstract': abstract,
            'Domain': domain,
            'General Summary': general_summary,
            'HealthCare Summary': healthcare_summary,
            'AI Summary': ai_summary,
            'Collaborative Summary': collaborative_summary
        })

    # Convert results into DataFrame
    result_df = pd.DataFrame(results)

    # Save to CSV and return path
    output_file = "processed_results.csv"
    result_df.to_csv(output_file, index=False)
    return output_file

# Gradio UI components
def create_ui():
    with gr.Blocks() as demo:
        gr.Markdown("## Research Paper Summarization App")

        with gr.Tab("Single Abstract"):
            title_input = gr.Textbox(label="Paper Title", placeholder="Enter the paper title here")
            abstract_input = gr.Textbox(label="Paper Abstract", placeholder="Enter the paper abstract here", lines=5)
            single_output = gr.Textbox(label="Summarization Output", lines=5)

            # Button to process single abstract
            submit_btn_single = gr.Button("Process Abstract")
            submit_btn_single.click(process_single_abstract, inputs=[title_input, abstract_input], outputs=single_output)

        with gr.Tab("CSV Upload"):
            file_input = gr.File(label="Upload CSV file", file_types=[".csv"])
            output_file = gr.File(label="Download Processed Results")

            # Button to process CSV file
            submit_btn_csv = gr.Button("Process CSV")
            submit_btn_csv.click(process_csv, inputs=file_input, outputs=output_file)

    demo.launch()

# Create the Gradio UI
create_ui()