import gradio as gr import os import fitz # PyMuPDF for PDF extraction from pptx import Presentation from docx import Document # For Word files from groq import Groq # Assuming Groq is available as a Python library # Initialize Groq Client api_key=os.environ['GROQ_API_KEY'] client = Groq(api_key=os.environ['GROQ_API_KEY']) # File Extraction Functions def extract_text_from_pdf(file_path): pdf_text = "" try: pdf_file = fitz.open(file_path) for page_num in range(pdf_file.page_count): page = pdf_file.load_page(page_num) pdf_text += page.get_text() except Exception as e: return f"Error reading PDF: {e}" return pdf_text def extract_text_from_ppt(file_path): ppt_text = "" try: presentation = Presentation(file_path) for slide in presentation.slides: for shape in slide.shapes: if hasattr(shape, 'text'): ppt_text += shape.text + "\n" except Exception as e: return f"Error reading PPT: {e}" return ppt_text def extract_text_from_word(file_path): doc_text = "" try: document = Document(file_path) for paragraph in document.paragraphs: doc_text += paragraph.text + "\n" except Exception as e: return f"Error reading Word file: {e}" return doc_text def process_files(file_paths): text_data = "" for file_path in file_paths: if file_path.endswith(".pdf"): text_data += extract_text_from_pdf(file_path) elif file_path.endswith(".pptx"): text_data += extract_text_from_ppt(file_path) elif file_path.endswith(".docx"): text_data += extract_text_from_word(file_path) else: text_data += f"Unsupported file format: {file_path}\n" return text_data # Generate MCQs and Subjective Questions Using Groq def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium", question_type="mix"): try: num_mcqs = min(num_mcqs, 60) # Limit MCQs to 40 num_subjective = min(num_subjective, 20) # Limit Subjective Questions to 20 difficulty_levels = { "easy": "simple questions with direct answers.", "medium": "moderate complexity questions requiring reasoning.", "hard": "challenging questions requiring deep understanding." } question_type_map = { "reason": "Generate reasoning-based questions.", "short": "Generate short-answer questions.", "long": "Generate long-answer questions.", "case study": "Generate case study-based questions.", "mix": "Generate a mix of question types." } prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions. {question_type_map.get(question_type, 'mix')}" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192", ) response = chat_completion.choices[0].message.content.strip() # Split response into MCQs and Subjective questions mcqs, subjective = "", "" is_subjective_section = False for line in response.split("\n"): if "**Subjective Questions**" in line: is_subjective_section = True if is_subjective_section: subjective += line + "\n" else: mcqs += line + "\n" return mcqs, subjective except Exception as e: return f"Error generating questions: {e}", "" # Gradio Interface Function def process_and_generate(file_paths, raw_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type): combined_text = "" # Initialize the combined text variable # Extract text from uploaded files if file_paths: extracted_text = process_files(file_paths) if extracted_text.strip(): # Ensure extracted text is non-empty combined_text += extracted_text # Add raw text if provided if raw_text.strip(): # Check if raw text is non-empty if combined_text: # If text from files exists, concatenate with raw text combined_text += "\n" + raw_text else: # If no text from files, use raw text as the only input combined_text = raw_text # Check if there is any text to process if not combined_text.strip(): return "No text provided to generate questions.", "No text provided to generate questions." # Generate questions from the combined text try: mcqs, subjective = generate_questions( combined_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type ) return mcqs, subjective except Exception as e: return f"Error generating questions: {e}", f"Error generating questions: {e}" # Gradio Inputs and Outputs inputs = [ gr.File(file_count="multiple", type="filepath", label="Upload Files (.pdf, .pptx, .docx)"), gr.Textbox(lines=3, placeholder="Enter raw text here (Optional)...", label="Raw Text"), gr.Slider(minimum=2, maximum=60, value=5, step=1, label="Number of MCQs (Max 60)"), gr.Slider(minimum=2, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"), gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"), gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium"), gr.Radio(["reason", "short", "long", "case study", "mix"], label="Select Type of Question", value="mix") ] outputs = [ gr.Textbox(label="Generated MCQs", lines=10), gr.Textbox(label="Generated Subjective Questions", lines=10) ] # Gradio Interface gr.Interface( fn=process_and_generate, inputs=inputs, outputs=outputs, title="MCQ & Subjective Question Generator", live=False ).launch(share=True)