import gradio as gr import os import fitz # PyMuPDF for PDF extraction import requests from bs4 import BeautifulSoup from pptx import Presentation from docx import Document # For Word files from groq import Groq # Assuming Groq is available as a Python library # Initialize Groq Client using environment variable api_key = os.getenv('MCQs') # Retrieve API key from environment if api_key: client = Groq(api_key=api_key) else: raise ValueError("API Key not found in environment variables!") # Step 1: File Extraction Functions def extract_text_from_pdf(file): pdf_text = "" try: pdf_file = fitz.open(file.name) for page_num in range(pdf_file.page_count): page = pdf_file.load_page(page_num) pdf_text += page.get_text() except Exception as e: return f"Error reading PDF: {e}" return pdf_text def extract_text_from_ppt(file): ppt_text = "" try: presentation = Presentation(file.name) for slide in presentation.slides: for shape in slide.shapes: if hasattr(shape, 'text'): ppt_text += shape.text + "\n" except Exception as e: return f"Error reading PPT: {e}" return ppt_text def extract_text_from_word(file): doc_text = "" try: document = Document(file.name) for paragraph in document.paragraphs: doc_text += paragraph.text + "\n" except Exception as e: return f"Error reading Word file: {e}" return doc_text def extract_text_from_url(url): page_text = "" try: response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') page_text = soup.get_text(separator="\n", strip=True) except Exception as e: return f"Error reading URL: {e}" return page_text def process_files(files, url): text_data = "" for file in files: if file.name.endswith(".pdf"): text_data += extract_text_from_pdf(file) elif file.name.endswith(".pptx"): text_data += extract_text_from_ppt(file) elif file.name.endswith(".docx"): text_data += extract_text_from_word(file) else: text_data += "Unsupported file format: " + file.name + "\n" # Inform user of unsupported format if url: text_data += extract_text_from_url(url) return text_data # Step 2: Generate MCQs and Subjective Questions Using Groq def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium"): try: num_mcqs = min(num_mcqs, 40) # Limit MCQs to 40 num_subjective = min(num_subjective, 20) # Limit Subjective Questions to 20 difficulty_levels = { "easy": "simple questions with direct answers.", "medium": "moderate complexity questions requiring reasoning.", "hard": "challenging questions requiring deep understanding." } prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions." chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192", ) response = chat_completion.choices[0].message.content.strip() # Split response based on question types mcqs = "" subjective = "" # Initialize the flag for the subjective section is_subjective_section = False # Split the MCQs and Subjective questions based on content patterns lines = response.split("\n") for line in lines: if line.strip().startswith("**Subjective Questions**"): # Identify the start of the subjective section is_subjective_section = True subjective += line + "\n" # Add the header to the subjective section as well elif is_subjective_section: subjective += line + "\n" else: mcqs += line + "\n" return mcqs, subjective except Exception as e: return f"Error generating questions: {e}", "" # Step 3: Gradio Interface def process_and_generate(files, raw_text, url, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium"): extracted_text = "" # Extract and process all input files if files: for file in files: if file.name.endswith(".pdf"): extracted_text += extract_text_from_pdf(file) + "\n" elif file.name.endswith(".pptx"): extracted_text += extract_text_from_ppt(file) + "\n" elif file.name.endswith(".docx"): extracted_text += extract_text_from_word(file) + "\n" else: extracted_text += f"Unsupported file format: {file.name}\n" # Append raw text input if provided if raw_text: extracted_text += raw_text + "\n" # Extract text from URL if provided if url: extracted_text += extract_text_from_url(url) + "\n" # Check if there's any extracted text to process if not extracted_text.strip(): return "No valid input provided to generate questions.", "" # Generate questions from the extracted text mcqs, subjective = generate_questions(extracted_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective) return mcqs, subjective # Gradio Inputs inputs = [ gr.File(file_count="multiple", type="filepath"), # Corrected to 'filepath' gr.Textbox(lines=2, placeholder="Enter raw text here...", label="Raw Text (Optional)"), gr.Textbox(lines=1, placeholder="Enter URL here...", label="URL (Optional)"), gr.Slider(minimum=1, maximum=40, value=5, step=1, label="Number of MCQs (Max 40)"), gr.Slider(minimum=1, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"), gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"), gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium") ] # Outputs (Separate outputs for MCQs and Subjective Questions) mcq_output = gr.Textbox(label="Generated MCQs", lines=10) # Display MCQs in a larger box subjective_output = gr.Textbox(label="Generated Subjective Questions", lines=10) # Display subjective questions in a larger box # Launch Gradio Interface with separate outputs gr.Interface( fn=process_and_generate, inputs=inputs, outputs=[mcq_output, subjective_output], # Only MCQs and Subjective Questions live=False, title="MCQ & Subjective Question Generator", theme="default" ).launch(share=True) # Share link for easy access