import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re
from docx import Document
import os

# Initialize Together API key
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
checkpoint = "sshleifer/distilbart-cnn-12-6"
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
def Summary_BART(text):
    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    return summary[0]

def DocToQuizz(file, difficulty_level):
    # Read the PDF content
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    summary = Summary_BART(text)

    # Define the prompt template for generating questions
    mcq_template = """
    Generate 20 different questions based on the following summary: {summary}
    The difficulty level of the questions should be: {difficulty_level}

    For the multiple-choice questions (MCQs), please provide the following for each question:
    1. Question
       - Use varied question formats such as:
         - "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
       - Ensure questions are logically phrased and relevant to the content.
    2. Correct answer
    3. Three plausible incorrect answer options
    4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"

    For the short questions, please provide:
    1. Question
       - Use varied question formats to encourage conceptual understanding and avoid repetition.
       - Ensure the short questions do not overlap in content with the MCQs.
    2. Short, concise answer
    3. Format: "SQ: <question text>\nAnswer: <answer>"

    Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing.
    """
    prompt = PromptTemplate(
        input_variables=['summary', 'difficulty_level'],
        template=mcq_template
    )
    
    Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

    response = Generated_mcqs.invoke({
        "summary": summary,
        "difficulty_level": difficulty_level
    })

    response_text = response['text']

    # Extract MCQs and Short Questions
    mcq_pattern = r'\d+\.\s*Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
    short_question_pattern = r'\d+\.\s*SQ:\s*(.*?)\n'

    mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
    short_questions = re.findall(short_question_pattern, response_text, re.DOTALL)

    # Initialize a Word document
    doc = Document()
    doc.add_heading("Physics Questions", level=1)

    # Add a section for MCQs with options
    doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
    for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
        # Split incorrect answers
        incorrect_answers = incorrect_answers.split(', ')

        # Add question and options to the document
        doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
        doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
        for i, incorrect in enumerate(incorrect_answers, start=2):
            doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")

    # Add a page break and section for Short Questions
    doc.add_page_break()
    doc.add_heading("Short Questions", level=2)
    for idx, question in enumerate(short_questions, start=1):
        doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text")

    # Save the document
    doc.save("Physics_Questions.docx")
    return "Physics_Questions.docx"

# Get list of PDF files in the directory
pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf']
difficulty_levels = ["Easy", "Medium", "Hard"]

# Gradio Interface
def generate_quiz(file, difficulty_level):
    output_file = DocToQuizz(file, difficulty_level)
    return output_file

interface = gr.Interface(
    fn=generate_quiz,
    inputs=[
        gr.Dropdown(pdf_files, label="Select PDF File"),
        gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf')
    ],
    outputs=gr.File(label="Download Quiz Document"),
    title="Quiz Generator",
    description="Select a PDF file and difficulty level to generate quiz questions."
)

# Launch the interface
interface.launch(debug=True)