Spaces:

namberino
/

rag-mcq

Sleeping

App Files Files Community

namberino commited on 9 days ago

Commit

d405869

1 Parent(s): f69f5bd

Revert

Browse files

Files changed (3) hide show

enhanced_rag_mcq.py +113 -21
fastapi_app.py +20 -7
requirements.txt +0 -0

enhanced_rag_mcq.py CHANGED Viewed

@@ -11,6 +11,7 @@ import os
 import json
 import time
 import torch
 from typing import List, Dict, Any, Optional, Tuple
 from dataclasses import dataclass, asdict
 from pathlib import Path
@@ -28,7 +29,6 @@ from langchain_core.prompts import PromptTemplate
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
-from unsloth import FastLanguageModel
 # Transformers imports
 from transformers import (
@@ -338,7 +338,7 @@ class EnhancedRAGMCQGenerator:
         """Get default configuration"""
         return {
             "embedding_model": "bkai-foundation-models/vietnamese-bi-encoder",
-            "llm_model": "unsloth/Qwen2.5-3B", # 7B, 1.5B
             "chunk_size": 500,
             "chunk_overlap": 50,
             "retrieval_k": 3,
@@ -383,10 +383,9 @@ class EnhancedRAGMCQGenerator:
         # Vietnamese typically has ~0.75 tokens per character
         return int(len(text) * 0.75)
-     #? Parse Json String
     def _extract_json_from_response(self, response: str) -> dict:
         """Extract JSON from LLM response with multiple fallback strategies"""
-        import re
         # Strategy 1: Clean response of prompt repetition
         clean_response = response
@@ -474,16 +473,16 @@ class EnhancedRAGMCQGenerator:
             bnb_4bit_quant_type="nf4"
         )
-        model, tokenizer = FastLanguageModel.from_pretrained(
             self.config["llm_model"],
             quantization_config=bnb_config,
             low_cpu_mem_usage=True,
-            device_map="auto",
             token=hf_token
         )
-        # tokenizer = AutoTokenizer.from_pretrained(self.config["llm_model"])
-        tokenizer.pad_token = tokenizer.eos_token
         model_pipeline = pipeline(
             "text-generation",
@@ -658,6 +657,43 @@ class EnhancedRAGMCQGenerator:
             print(f"Raw response: {response[:500]}...")
             raise ValueError(f"Failed to parse LLM response: {e}")
     def generate_batch(self,
                       topics: List[str],
                       question_per_topic: int = 5,
@@ -671,8 +707,9 @@ class EnhancedRAGMCQGenerator:
         if question_types is None:
             question_types = [QuestionType.DEFINITION, QuestionType.APPLICATION]
-        mcqs = []
         total_questions = len(topics) * question_per_topic
         print(f"🎯 Generating {total_questions} MCQs...")
@@ -680,21 +717,76 @@ class EnhancedRAGMCQGenerator:
             print(f"📝 Processing topic {i+1}/{len(topics)}: {topic}")
             for j in range(question_per_topic):
-                try:
-                    # Cycle through difficulties and question types
-                    difficulty = difficulties[j % len(difficulties)]
-                    question_type = question_types[j % len(question_types)]
-                    mcq = self.generate_mcq(topic, difficulty, question_type)
-                    mcqs.append(mcq)
-                    print(f"  ✅ Generated question {j+1}/{question_per_topic} "
-                          f"(Quality: {mcq.confidence_score:.1f})")
-                except Exception as e:
-                    print(f"  ❌ Failed to generate question {j+1}: {e}")
-        print(f"🎉 Generated {len(mcqs)}/{total_questions} MCQs successfully")
         return mcqs
     def export_mcqs(self, mcqs: List[MCQQuestion], output_path: str):

 import json
 import time
 import torch
+import re
 from typing import List, Dict, Any, Optional, Tuple
 from dataclasses import dataclass, asdict
 from pathlib import Path
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
 # Transformers imports
 from transformers import (
         """Get default configuration"""
         return {
             "embedding_model": "bkai-foundation-models/vietnamese-bi-encoder",
+            "llm_model": "Qwen/Qwen2.5-3B-Instruct", # 7B, 1.5B
             "chunk_size": 500,
             "chunk_overlap": 50,
             "retrieval_k": 3,
         # Vietnamese typically has ~0.75 tokens per character
         return int(len(text) * 0.75)
+	#? Parse Json String
     def _extract_json_from_response(self, response: str) -> dict:
         """Extract JSON from LLM response with multiple fallback strategies"""
         # Strategy 1: Clean response of prompt repetition
         clean_response = response
             bnb_4bit_quant_type="nf4"
         )
+        model = AutoModelForCausalLM.from_pretrained(
             self.config["llm_model"],
             quantization_config=bnb_config,
             low_cpu_mem_usage=True,
+            device_map="cuda", # Use CUDA if available
             token=hf_token
         )
+        tokenizer = AutoTokenizer.from_pretrained(self.config["llm_model"])
+        # tokenizer.pad_token = tokenizer.eos_token
         model_pipeline = pipeline(
             "text-generation",
             print(f"Raw response: {response[:500]}...")
             raise ValueError(f"Failed to parse LLM response: {e}")
+    def _batch_invoke(self, prompts: List[str]) -> List[str]:
+        if not prompts:
+            return []
+        # Try to use transformers pipeline (batch mode)
+        pl = getattr(self.llm, "pipeline", None)
+        if pl is not None:
+            try:
+                # Call the pipeline with a list. Transformers will return a list of generation outputs.
+                raw_outputs = pl(prompts)
+                responses = []
+                for out in raw_outputs:
+                    # The pipeline may return either a dict (single result) or a list of dicts (if return_full_text or num_return_sequences was set)
+                    if isinstance(out, list) and out:
+                        text = out[0].get("generated_text", "")
+                    elif isinstance(out, dict):
+                        text = out.get("generated_text", "")
+                    else:
+                        # fallback: coerce to string
+                        text = str(out)
+                    responses.append(text)
+                if len(responses) == len(prompts):
+                    return responses
+                else:
+                    print("⚠️ Batch pipeline returned unexpected shape — falling back")
+            except Exception as e:
+                # Batch mode failed. Fall back to sequential invocations.
+                print(f"⚠️ Batch invoke failed: {e}. Falling back to sequential.")
+        # Sequential invocation to preserve behavior
+        results = []
+        for p in prompts:
+            results.append(self.llm.invoke(p))
+        return results
     def generate_batch(self,
                       topics: List[str],
                       question_per_topic: int = 5,
         if question_types is None:
             question_types = [QuestionType.DEFINITION, QuestionType.APPLICATION]
         total_questions = len(topics) * question_per_topic
+        prompt_metadatas = [] # stores tuples (topic, difficulty, question_type)
+        formatted_prompts = []
         print(f"🎯 Generating {total_questions} MCQs...")
             print(f"📝 Processing topic {i+1}/{len(topics)}: {topic}")
             for j in range(question_per_topic):
+                difficulty = difficulties[j % len(difficulties)]
+                question_type = question_types[j % len(question_types)]
+                query = topic
+                # retrieve context once per prompt
+                contexts = self.retriever.retrieve_diverse_contexts(query, k=self.config.get("k", 5)) if hasattr(self, "retriever") else []
+                context_text = "\n\n".join([d.page_content for d in contexts]) if contexts else topic
+                # Build prompt with PromptTemplateManager
+                prompt_template = self.template_manager.get_template(question_type)
+                prompt_input = {
+                    "context": context_text,
+                    "topic": topic,
+                    "difficulty": difficulty.value if hasattr(difficulty, 'value') else str(difficulty),
+                    "question_type": question_type.value if hasattr(question_type, 'value') else str(question_type)
+                }
+                formatted = prompt_template.format(**prompt_input)
+                # Length check and fallback
+                is_safe, token_count = self.check_prompt_length(formatted)
+                if not is_safe:
+                    truncated = formatted[: self.config.get("max_prompt_chars", 2000)]
+                    formatted = truncated
+                prompt_metadatas.append((topic, difficulty, question_type))
+                formatted_prompts.append(formatted)
+        total = len(formatted_prompts)
+        if total == 0:
+            return []
+        print(f"📦 Sending {total} prompts to the LLM in batch mode (if supported)")
+        start_t = time.time()
+        raw_responses = self._batch_invoke(formatted_prompts)
+        elapsed = time.time() - start_t
+        print(f"⏱ LLM batch time: {elapsed:.2f}s for {total} prompts")
+        # Parse raw responses back into MCQQuestion objects
+        mcqs = []
+        for meta, response in zip(prompt_metadatas, raw_responses):
+            topic, difficulty, question_type = meta
+            try:
+                response_data = self._extract_json_from_response(response)
+                # Reconstruct MCQQuestion
+                options = []
+                for label, text in response_data["options"].items():
+                    is_correct = label == response_data["correct_answer"]
+                    options.append(self.MCQOption(label=label, text=text, is_correct=is_correct))
+                mcq = self.MCQQuestion(
+                    question_id=response_data.get("id", None),
+                    topic=topic,
+                    question_text=response_data["question"],
+                    options=options,
+                    explanation=response_data.get("explanation", ""),
+                    difficulty=(difficulty if hasattr(difficulty, 'name') else difficulty),
+                    question_type=(question_type if hasattr(question_type, 'name') else question_type),
+                    confidence_score=response_data.get("confidence_score", 0.0)
+                )
+                if hasattr(self, 'validator'):
+                    mcq = self.validator.calculate_quality_score(mcq)
+                mcqs.append(mcq)
+            except Exception as e:
+                print(f"❌ Failed parsing response for topic={topic}: {e}")
+        print(f"🎉 Generated {len(mcqs)}/{total} MCQs successfully (batched)")
         return mcqs
     def export_mcqs(self, mcqs: List[MCQQuestion], output_path: str):

fastapi_app.py CHANGED Viewed

@@ -9,9 +9,8 @@ from fastapi import FastAPI, Form, HTTPException, UploadFile, File
 from contextlib import asynccontextmanager
 generator: Optional[EnhancedRAGMCQGenerator] = None
-tmp_folder = "./tmp"
 if not os.path.exists(tmp_folder):
     os.makedirs(tmp_folder)
@@ -39,7 +38,6 @@ class GenerateResponse(BaseModel):
     avg_confidence: float
     generation_time: float
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global generator
@@ -59,14 +57,14 @@ app = FastAPI(
     lifespan=lifespan
 )
-#? cmd: uvicorn app:app --reload --reload-exclude unsloth_compiled_cache
 @app.post("/generate/")
 async def mcq_gen(
     file: UploadFile = File(...),
     topics: str = Form(...),
     n_questions: str = Form(...),
-    difficulty: DifficultyLevel = Form(...),
-    qtype: QuestionType = Form(...)
 ):
     if not generator:
         raise HTTPException(status_code=500, detail="Generator not initialized")
@@ -75,6 +73,19 @@ async def mcq_gen(
     if not topic_list:
         raise HTTPException(status_code=400, detail="At least one topic must be provided")
     # Save uploaded PDF to temporary folder
     filename = file.filename if file.filename else "uploaded_file"
     file_path = os.path.join(tmp_folder, filename)
@@ -93,7 +104,9 @@ async def mcq_gen(
     try:
         mcqs = generator.generate_batch(
             topics=topic_list,
-            question_per_topic=int(n_questions)
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from contextlib import asynccontextmanager
 generator: Optional[EnhancedRAGMCQGenerator] = None
+tmp_folder = "./tmp" #? make sure folder upload here
 if not os.path.exists(tmp_folder):
     os.makedirs(tmp_folder)
     avg_confidence: float
     generation_time: float
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global generator
     lifespan=lifespan
 )
+#? cmd: fastapi run app.py
 @app.post("/generate/")
 async def mcq_gen(
     file: UploadFile = File(...),
     topics: str = Form(...),
     n_questions: str = Form(...),
+    difficulty: str = Form(...),
+    qtype: str = Form(...)
 ):
     if not generator:
         raise HTTPException(status_code=500, detail="Generator not initialized")
     if not topic_list:
         raise HTTPException(status_code=400, detail="At least one topic must be provided")
+    # Validate and convert enum values
+    try:
+        difficulty_enum = DifficultyLevel(difficulty.lower())
+    except ValueError:
+        valid_difficulties = [d.value for d in DifficultyLevel]
+        raise HTTPException(status_code=400, detail=f"Invalid difficulty. Must be one of: {valid_difficulties}")
+    try:
+        qtype_enum = QuestionType(qtype.lower())
+    except ValueError:
+        valid_types = [q.value for q in QuestionType]
+        raise HTTPException(status_code=400, detail=f"Invalid question type. Must be one of: {valid_types}")
     # Save uploaded PDF to temporary folder
     filename = file.filename if file.filename else "uploaded_file"
     file_path = os.path.join(tmp_folder, filename)
     try:
         mcqs = generator.generate_batch(
             topics=topic_list,
+            question_per_topic=int(n_questions),
+            difficulties=[difficulty_enum],
+            question_types=[qtype_enum]
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ