import gradio as gr from PyPDF2 import PdfReader from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from langchain import PromptTemplate from langchain import LLMChain from langchain_together import Together import re from docx import Document import os # Initialize Together API key os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5" checkpoint = "sshleifer/distilbart-cnn-12-6" llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) def Summary_BART(text): inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") summary_ids = model.generate(inputs["input_ids"]) summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) return summary[0] def DocToQuizz(file, difficulty_level): # Read the PDF content reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() summary = Summary_BART(text) # Define the prompt template for generating questions mcq_template = """ Generate 20 different questions based on the following summary: {summary} The difficulty level of the questions should be: {difficulty_level} For the multiple-choice questions (MCQs), please provide the following for each question: 1. Question - Use varied question formats such as: - "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc. - Ensure questions are logically phrased and relevant to the content. 2. Correct answer 3. Three plausible incorrect answer options 4. Format: "Question: \nCorrect answer: \nIncorrect answers: , , " For the short questions, please provide: 1. Question - Use varied question formats to encourage conceptual understanding and avoid repetition. - Ensure the short questions do not overlap in content with the MCQs. 2. Short, concise answer 3. Format: "SQ: \nAnswer: " Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing. """ prompt = PromptTemplate( input_variables=['summary', 'difficulty_level'], template=mcq_template ) Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) response = Generated_mcqs.invoke({ "summary": summary, "difficulty_level": difficulty_level }) response_text = response['text'] # Extract MCQs and Short Questions mcq_pattern = r'\d+\.\s*Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n' short_question_pattern = r'\d+\.\s*SQ:\s*(.*?)\n' mcqs = re.findall(mcq_pattern, response_text, re.DOTALL) short_questions = re.findall(short_question_pattern, response_text, re.DOTALL) # Initialize a Word document doc = Document() doc.add_heading("Physics Questions", level=1) # Add a section for MCQs with options doc.add_heading("Multiple Choice Questions (MCQs)", level=2) for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1): # Split incorrect answers incorrect_answers = incorrect_answers.split(', ') # Add question and options to the document doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number") doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet") for i, incorrect in enumerate(incorrect_answers, start=2): doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet") # Add a page break and section for Short Questions doc.add_page_break() doc.add_heading("Short Questions", level=2) for idx, question in enumerate(short_questions, start=1): doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text") # Save the document doc.save("Physics_Questions.docx") return "Physics_Questions.docx" # Get list of PDF files in the directory pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf'] difficulty_levels = ["Easy", "Medium", "Hard"] # Gradio Interface def generate_quiz(file, difficulty_level): output_file = DocToQuizz(file, difficulty_level) return output_file interface = gr.Interface( fn=generate_quiz, inputs=[ gr.Dropdown(pdf_files, label="Select PDF File"), gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf') ], outputs=gr.File(label="Download Quiz Document"), title="Quiz Generator", description="Select a PDF file and difficulty level to generate quiz questions." ) # Launch the interface interface.launch(debug=True)