Spaces:

aditizy
/

QuizCraftAi

Sleeping

App Files Files Community

DishaKushwah commited on Jul 12

Commit

cac9aa5

1 Parent(s): c11256e

Created using Colab

Browse files

Files changed (1) hide show

mcq_generator.ipynb +12 -80

mcq_generator.ipynb CHANGED Viewed

@@ -5,7 +5,7 @@
     "colab": {
       "provenance": [],
       "gpuType": "T4",
-      "authorship_tag": "ABX9TyN0JKsW96r+3LNgLoy+yfAI",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -79,12 +79,7 @@
         "            self.nlp = None\n",
         "\n",
         "        # Load fill-mask pipeline for generating distractors\n",
-        "        self.fill_mask = pipeline(\n",
-        "            \"fill-mask\",\n",
-        "            model=\"roberta-large\",\n",
-        "            tokenizer=\"roberta-large\",\n",
-        "            device=0 if torch.cuda.is_available() else -1\n",
-        "        )\n",
         "\n",
         "        # Download NLTK data\n",
         "        try:\n",
@@ -99,17 +94,11 @@
         "            return {\"entities\": [], \"noun_chunks\": [], \"sentences\": []}\n",
         "\n",
         "        doc = self.nlp(text)\n",
-        "\n",
         "        # Extract named entities\n",
         "        entities = []\n",
         "        for ent in doc.ents:\n",
         "            if ent.label_ in ['PERSON', 'ORG', 'GPE', 'DATE', 'EVENT', 'WORK_OF_ART', 'CARDINAL', 'ORDINAL']:\n",
-        "                entities.append({\n",
-        "                    'text': ent.text,\n",
-        "                    'label': ent.label_,\n",
-        "                    'start': ent.start_char,\n",
-        "                    'end': ent.end_char\n",
-        "                })\n",
         "\n",
         "        # Extract noun chunks\n",
         "        noun_chunks = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) <= 4]\n",
@@ -117,36 +106,17 @@
         "        # Extract sentences\n",
         "        sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.split()) > 5]\n",
         "\n",
-        "        return {\n",
-        "            \"entities\": entities,\n",
-        "            \"noun_chunks\": noun_chunks,\n",
-        "            \"sentences\": sentences\n",
-        "        }\n",
         "\n",
         "    def generate_question_from_context(self, context: str, answer_text: str) -> str:\n",
         "        \"\"\"Generate a question given context and answer.\"\"\"\n",
         "        # Highlight the answer in the context for T5\n",
         "        highlighted_context = context.replace(answer_text, f\"<hl>{answer_text}<hl>\")\n",
         "        input_text = f\"generate question: {highlighted_context}\"\n",
-        "\n",
-        "        inputs = self.qg_tokenizer.encode_plus(\n",
-        "            input_text,\n",
-        "            max_length=512,\n",
-        "            truncation=True,\n",
-        "            padding=True,\n",
-        "            return_tensors=\"pt\"\n",
-        "        ).to(self.device)\n",
         "\n",
         "        with torch.no_grad():\n",
-        "            outputs = self.qg_model.generate(\n",
-        "                inputs[\"input_ids\"],\n",
-        "                attention_mask=inputs[\"attention_mask\"],\n",
-        "                max_length=64,\n",
-        "                num_beams=4,\n",
-        "                temperature=0.8,\n",
-        "                do_sample=True,\n",
-        "                early_stopping=True\n",
-        "            )\n",
         "\n",
         "        question = self.qg_tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
         "        return question\n",
@@ -186,9 +156,7 @@
         "\n",
         "            # Find similar entities\n",
         "            for ent in doc.ents:\n",
-        "                if (ent.label_ == answer_label and\n",
-        "                    ent.text != correct_answer and\n",
-        "                    ent.text not in distractors):\n",
         "                    distractors.append(ent.text)\n",
         "                    if len(distractors) >= num_distractors:\n",
         "                        break\n",
@@ -226,7 +194,6 @@
         "            if d.lower() not in seen and d.lower() != correct_answer.lower():\n",
         "                seen.add(d.lower())\n",
         "                unique_distractors.append(d)\n",
-        "\n",
         "        return unique_distractors[:num_distractors]\n",
         "\n",
         "    def validate_mcq_quality(self, question: str, correct_answer: str, distractors: List[str], context: str) -> Dict:\n",
@@ -242,7 +209,6 @@
         "            correct_embedding = self.sentence_model.encode([correct_answer])\n",
         "            predicted_embedding = self.sentence_model.encode([predicted_answer])\n",
         "            similarity = cosine_similarity(correct_embedding, predicted_embedding)[0][0]\n",
-        "\n",
         "            is_answerable = similarity > similarity_threshold or correct_answer.lower() in predicted_answer.lower()\n",
         "\n",
         "        except:\n",
@@ -265,13 +231,7 @@
         "            distractor_quality = \"poor\"\n",
         "            avg_distractor_similarity = 0.0\n",
         "\n",
-        "        return {\n",
-        "            \"is_answerable\": is_answerable,\n",
-        "            \"confidence\": confidence,\n",
-        "            \"answer_similarity\": similarity,\n",
-        "            \"distractor_quality\": distractor_quality,\n",
-        "            \"avg_distractor_similarity\": avg_distractor_similarity\n",
-        "        }\n",
         "\n",
         "    def generate_mcq(self, context: str, num_questions: int = 5) -> List[Dict]:\n",
         "        \"\"\"Generate multiple choice questions from context.\"\"\"\n",
@@ -304,12 +264,7 @@
         "\n",
         "            mcq = {\n",
         "                \"question\": question,\n",
-        "                \"options\": {\n",
-        "                    \"A\": options[0],\n",
-        "                    \"B\": options[1],\n",
-        "                    \"C\": options[2] if len(options) > 2 else \"None of the above\",\n",
-        "                    \"D\": options[3] if len(options) > 3 else \"All of the above\"\n",
-        "                },\n",
         "                \"correct_answer\": correct_option,\n",
         "                \"correct_text\": correct_answer,\n",
         "                \"entity_type\": entity[\"label\"],\n",
@@ -337,12 +292,7 @@
         "\n",
         "                        mcq = {\n",
         "                            \"question\": question,\n",
-        "                            \"options\": {\n",
-        "                                \"A\": options[0],\n",
-        "                                \"B\": options[1],\n",
-        "                                \"C\": options[2] if len(options) > 2 else \"None of the above\",\n",
-        "                                \"D\": options[3] if len(options) > 3 else \"All of the above\"\n",
-        "                            },\n",
         "                            \"correct_answer\": correct_option,\n",
         "                            \"correct_text\": chunk,\n",
         "                            \"entity_type\": \"NOUN_CHUNK\",\n",
@@ -358,36 +308,18 @@
         "def main():\n",
         "    \"\"\"Main function to demonstrate the MCQ generator.\"\"\"\n",
         "    generator = MultipleChoiceQuestionGenerator()\n",
-        "\n",
-        "    # Sample context\n",
-        "    sample_context = \"\"\"\n",
-        "    The Renaissance was a period of cultural, artistic, political and economic rebirth following the Middle Ages.\n",
-        "    It began in Italy in the 14th century and spread throughout Europe. Leonardo da Vinci, born in 1452, was one\n",
-        "    of the most famous Renaissance artists and inventors. He created masterpieces like the Mona Lisa and The Last Supper.\n",
-        "    Michelangelo, another renowned artist, painted the ceiling of the Sistine Chapel between 1508 and 1512.\n",
-        "    The Renaissance emphasized humanism, scientific inquiry, and artistic innovation. The printing press,\n",
-        "    invented by Johannes Gutenberg around 1440, helped spread Renaissance ideas across Europe.\n",
-        "    This period lasted approximately 300 years, from the 14th to the 17th century.\n",
-        "    \"\"\"\n",
-        "\n",
         "    print(\"Multiple Choice Question Generator\")\n",
         "\n",
         "    # Get user input\n",
-        "    user_context = input(\"Enter your context (or press Enter to use sample): \").strip()\n",
-        "    if not user_context:\n",
-        "        user_context = sample_context\n",
-        "        print(\"Using sample context about the Renaissance...\")\n",
-        "\n",
         "    try:\n",
         "        num_questions = int(input(\"Number of MCQs to generate (default 5): \") or \"5\")\n",
         "    except ValueError:\n",
         "        num_questions = 5\n",
-        "\n",
         "    print(f\"\\nGenerating {num_questions} multiple choice questions...\")\n",
         "\n",
         "    # Generate MCQs\n",
         "    mcqs = generator.generate_mcq(user_context, num_questions)\n",
-        "\n",
         "    # Display results\n",
         "    if mcqs:\n",
         "        for i, mcq in enumerate(mcqs, 1):\n",
@@ -413,7 +345,7 @@
         "id": "o1ic84jCGc-u",
         "outputId": "82f5601f-0e8f-4ca7-d9a4-b514a58df793"
       },
-      "execution_count": 2,
       "outputs": [
         {
           "output_type": "stream",

     "colab": {
       "provenance": [],
       "gpuType": "T4",
+      "authorship_tag": "ABX9TyNlLgN36uc2PRyXWiLUUS03",
       "include_colab_link": true
     },
     "kernelspec": {
         "            self.nlp = None\n",
         "\n",
         "        # Load fill-mask pipeline for generating distractors\n",
+        "        self.fill_mask = pipeline(\"fill-mask\",model=\"roberta-large\",tokenizer=\"roberta-large\",device=0 if torch.cuda.is_available() else -1)\n",
         "\n",
         "        # Download NLTK data\n",
         "        try:\n",
         "            return {\"entities\": [], \"noun_chunks\": [], \"sentences\": []}\n",
         "\n",
         "        doc = self.nlp(text)\n",
         "        # Extract named entities\n",
         "        entities = []\n",
         "        for ent in doc.ents:\n",
         "            if ent.label_ in ['PERSON', 'ORG', 'GPE', 'DATE', 'EVENT', 'WORK_OF_ART', 'CARDINAL', 'ORDINAL']:\n",
+        "                entities.append({'text': ent.text,'label': ent.label_,'start': ent.start_char,'end': ent.end_char})\n",
         "\n",
         "        # Extract noun chunks\n",
         "        noun_chunks = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) <= 4]\n",
         "        # Extract sentences\n",
         "        sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.split()) > 5]\n",
         "\n",
+        "        return {\"entities\": entities,\"noun_chunks\": noun_chunks,\"sentences\": sentences}\n",
         "\n",
         "    def generate_question_from_context(self, context: str, answer_text: str) -> str:\n",
         "        \"\"\"Generate a question given context and answer.\"\"\"\n",
         "        # Highlight the answer in the context for T5\n",
         "        highlighted_context = context.replace(answer_text, f\"<hl>{answer_text}<hl>\")\n",
         "        input_text = f\"generate question: {highlighted_context}\"\n",
+        "        inputs = self.qg_tokenizer.encode_plus(input_text,max_length=512,truncation=True,padding=True,return_tensors=\"pt\").to(self.device)\n",
         "\n",
         "        with torch.no_grad():\n",
+        "            outputs = self.qg_model.generate(inputs[\"input_ids\"],attention_mask=inputs[\"attention_mask\"],max_length=64,num_beams=4,temperature=0.8,do_sample=True,early_stopping=True)\n",
         "\n",
         "        question = self.qg_tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
         "        return question\n",
         "\n",
         "            # Find similar entities\n",
         "            for ent in doc.ents:\n",
+        "                if (ent.label_ == answer_label and ent.text != correct_answer and ent.text not in distractors):\n",
         "                    distractors.append(ent.text)\n",
         "                    if len(distractors) >= num_distractors:\n",
         "                        break\n",
         "            if d.lower() not in seen and d.lower() != correct_answer.lower():\n",
         "                seen.add(d.lower())\n",
         "                unique_distractors.append(d)\n",
         "        return unique_distractors[:num_distractors]\n",
         "\n",
         "    def validate_mcq_quality(self, question: str, correct_answer: str, distractors: List[str], context: str) -> Dict:\n",
         "            correct_embedding = self.sentence_model.encode([correct_answer])\n",
         "            predicted_embedding = self.sentence_model.encode([predicted_answer])\n",
         "            similarity = cosine_similarity(correct_embedding, predicted_embedding)[0][0]\n",
         "            is_answerable = similarity > similarity_threshold or correct_answer.lower() in predicted_answer.lower()\n",
         "\n",
         "        except:\n",
         "            distractor_quality = \"poor\"\n",
         "            avg_distractor_similarity = 0.0\n",
         "\n",
+        "        return {\"is_answerable\": is_answerable,\"confidence\": confidence,\"answer_similarity\": similarity,\"distractor_quality\": distractor_quality,\"avg_distractor_similarity\": avg_distractor_similarity }\n",
         "\n",
         "    def generate_mcq(self, context: str, num_questions: int = 5) -> List[Dict]:\n",
         "        \"\"\"Generate multiple choice questions from context.\"\"\"\n",
         "\n",
         "            mcq = {\n",
         "                \"question\": question,\n",
+        "                \"options\": {\"A\": options[0],\"B\": options[1],\"C\": options[2] if len(options) > 2 else \"None of the above\",\"D\": options[3] if len(options) > 3 else \"All of the above\"},\n",
         "                \"correct_answer\": correct_option,\n",
         "                \"correct_text\": correct_answer,\n",
         "                \"entity_type\": entity[\"label\"],\n",
         "\n",
         "                        mcq = {\n",
         "                            \"question\": question,\n",
+        "                            \"options\": {\"A\": options[0],\"B\": options[1],\"C\": options[2] if len(options) > 2 else \"None of the above\",\"D\": options[3] if len(options) > 3 else \"All of the above\"},\n",
         "                            \"correct_answer\": correct_option,\n",
         "                            \"correct_text\": chunk,\n",
         "                            \"entity_type\": \"NOUN_CHUNK\",\n",
         "def main():\n",
         "    \"\"\"Main function to demonstrate the MCQ generator.\"\"\"\n",
         "    generator = MultipleChoiceQuestionGenerator()\n",
         "    print(\"Multiple Choice Question Generator\")\n",
         "\n",
         "    # Get user input\n",
+        "    user_context = input(\"Enter your context: \").strip()\n",
         "    try:\n",
         "        num_questions = int(input(\"Number of MCQs to generate (default 5): \") or \"5\")\n",
         "    except ValueError:\n",
         "        num_questions = 5\n",
         "    print(f\"\\nGenerating {num_questions} multiple choice questions...\")\n",
         "\n",
         "    # Generate MCQs\n",
         "    mcqs = generator.generate_mcq(user_context, num_questions)\n",
         "    # Display results\n",
         "    if mcqs:\n",
         "        for i, mcq in enumerate(mcqs, 1):\n",
         "id": "o1ic84jCGc-u",
         "outputId": "82f5601f-0e8f-4ca7-d9a4-b514a58df793"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",