Spaces:

namberino
/

rag-mcq

Sleeping

App Files Files Community

namberino commited on 10 days ago

Commit

53e5542

1 Parent(s): d0a9bfe

Testing old version

Browse files

Files changed (2) hide show

enhanced_rag_mcq.py +21 -113
fastapi_app.py +7 -20

enhanced_rag_mcq.py CHANGED Viewed

@@ -11,7 +11,6 @@ import os
 import json
 import time
 import torch
-import re
 from typing import List, Dict, Any, Optional, Tuple
 from dataclasses import dataclass, asdict
 from pathlib import Path
@@ -29,6 +28,7 @@ from langchain_core.prompts import PromptTemplate
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
 # Transformers imports
 from transformers import (
@@ -338,7 +338,7 @@ class EnhancedRAGMCQGenerator:
         """Get default configuration"""
         return {
             "embedding_model": "bkai-foundation-models/vietnamese-bi-encoder",
-            "llm_model": "Qwen/Qwen2.5-3B-Instruct", # 7B, 1.5B
             "chunk_size": 500,
             "chunk_overlap": 50,
             "retrieval_k": 3,
@@ -383,9 +383,10 @@ class EnhancedRAGMCQGenerator:
         # Vietnamese typically has ~0.75 tokens per character
         return int(len(text) * 0.75)
-	#? Parse Json String
     def _extract_json_from_response(self, response: str) -> dict:
         """Extract JSON from LLM response with multiple fallback strategies"""
         # Strategy 1: Clean response of prompt repetition
         clean_response = response
@@ -473,16 +474,16 @@ class EnhancedRAGMCQGenerator:
             bnb_4bit_quant_type="nf4"
         )
-        model = AutoModelForCausalLM.from_pretrained(
             self.config["llm_model"],
             quantization_config=bnb_config,
             low_cpu_mem_usage=True,
-            device_map="cuda", # Use CUDA if available
             token=hf_token
         )
-        tokenizer = AutoTokenizer.from_pretrained(self.config["llm_model"])
-        # tokenizer.pad_token = tokenizer.eos_token
         model_pipeline = pipeline(
             "text-generation",
@@ -657,43 +658,6 @@ class EnhancedRAGMCQGenerator:
             print(f"Raw response: {response[:500]}...")
             raise ValueError(f"Failed to parse LLM response: {e}")
-    def _batch_invoke(self, prompts: List[str]) -> List[str]:
-        if not prompts:
-            return []
-        # Try to use transformers pipeline (batch mode)
-        pl = getattr(self.llm, "pipeline", None)
-        if pl is not None:
-            try:
-                # Call the pipeline with a list. Transformers will return a list of generation outputs.
-                raw_outputs = pl(prompts)
-                responses = []
-                for out in raw_outputs:
-                    # The pipeline may return either a dict (single result) or a list of dicts (if return_full_text or num_return_sequences was set)
-                    if isinstance(out, list) and out:
-                        text = out[0].get("generated_text", "")
-                    elif isinstance(out, dict):
-                        text = out.get("generated_text", "")
-                    else:
-                        # fallback: coerce to string
-                        text = str(out)
-                    responses.append(text)
-                if len(responses) == len(prompts):
-                    return responses
-                else:
-                    print("⚠️ Batch pipeline returned unexpected shape — falling back")
-            except Exception as e:
-                # Batch mode failed. Fall back to sequential invocations.
-                print(f"⚠️ Batch invoke failed: {e}. Falling back to sequential.")
-        # Sequential invocation to preserve behavior
-        results = []
-        for p in prompts:
-            results.append(self.llm.invoke(p))
-        return results
     def generate_batch(self,
                       topics: List[str],
                       question_per_topic: int = 5,
@@ -707,9 +671,8 @@ class EnhancedRAGMCQGenerator:
         if question_types is None:
             question_types = [QuestionType.DEFINITION, QuestionType.APPLICATION]
         total_questions = len(topics) * question_per_topic
-        prompt_metadatas = [] # stores tuples (topic, difficulty, question_type)
-        formatted_prompts = []
         print(f"🎯 Generating {total_questions} MCQs...")
@@ -717,76 +680,21 @@ class EnhancedRAGMCQGenerator:
             print(f"📝 Processing topic {i+1}/{len(topics)}: {topic}")
             for j in range(question_per_topic):
-                difficulty = difficulties[j % len(difficulties)]
-                question_type = question_types[j % len(question_types)]
-                query = topic
-                # retrieve context once per prompt
-                contexts = self.retriever.retrieve_diverse_contexts(query, k=self.config.get("k", 5)) if hasattr(self, "retriever") else []
-                context_text = "\n\n".join([d.page_content for d in contexts]) if contexts else topic
-                # Build prompt with PromptTemplateManager
-                prompt_template = self.template_manager.get_template(question_type)
-                prompt_input = {
-                    "context": context_text,
-                    "topic": topic,
-                    "difficulty": difficulty.value if hasattr(difficulty, 'value') else str(difficulty),
-                    "question_type": question_type.value if hasattr(question_type, 'value') else str(question_type)
-                }
-                formatted = prompt_template.format(**prompt_input)
-                # Length check and fallback
-                is_safe, token_count = self.check_prompt_length(formatted)
-                if not is_safe:
-                    truncated = formatted[: self.config.get("max_prompt_chars", 2000)]
-                    formatted = truncated
-                prompt_metadatas.append((topic, difficulty, question_type))
-                formatted_prompts.append(formatted)
-        total = len(formatted_prompts)
-        if total == 0:
-            return []
-        print(f"📦 Sending {total} prompts to the LLM in batch mode (if supported)")
-        start_t = time.time()
-        raw_responses = self._batch_invoke(formatted_prompts)
-        elapsed = time.time() - start_t
-        print(f"⏱ LLM batch time: {elapsed:.2f}s for {total} prompts")
-        # Parse raw responses back into MCQQuestion objects
-        mcqs = []
-        for meta, response in zip(prompt_metadatas, raw_responses):
-            topic, difficulty, question_type = meta
-            try:
-                response_data = self._extract_json_from_response(response)
-                # Reconstruct MCQQuestion
-                options = []
-                for label, text in response_data["options"].items():
-                    is_correct = label == response_data["correct_answer"]
-                    options.append(self.MCQOption(label=label, text=text, is_correct=is_correct))
-                mcq = self.MCQQuestion(
-                    question_id=response_data.get("id", None),
-                    topic=topic,
-                    question_text=response_data["question"],
-                    options=options,
-                    explanation=response_data.get("explanation", ""),
-                    difficulty=(difficulty if hasattr(difficulty, 'name') else difficulty),
-                    question_type=(question_type if hasattr(question_type, 'name') else question_type),
-                    confidence_score=response_data.get("confidence_score", 0.0)
-                )
-                if hasattr(self, 'validator'):
-                    mcq = self.validator.calculate_quality_score(mcq)
-                mcqs.append(mcq)
-            except Exception as e:
-                print(f"❌ Failed parsing response for topic={topic}: {e}")
-        print(f"🎉 Generated {len(mcqs)}/{total} MCQs successfully (batched)")
         return mcqs
     def export_mcqs(self, mcqs: List[MCQQuestion], output_path: str):

 import json
 import time
 import torch
 from typing import List, Dict, Any, Optional, Tuple
 from dataclasses import dataclass, asdict
 from pathlib import Path
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
+from unsloth import FastLanguageModel
 # Transformers imports
 from transformers import (
         """Get default configuration"""
         return {
             "embedding_model": "bkai-foundation-models/vietnamese-bi-encoder",
+            "llm_model": "unsloth/Qwen2.5-3B", # 7B, 1.5B
             "chunk_size": 500,
             "chunk_overlap": 50,
             "retrieval_k": 3,
         # Vietnamese typically has ~0.75 tokens per character
         return int(len(text) * 0.75)
+     #? Parse Json String
     def _extract_json_from_response(self, response: str) -> dict:
         """Extract JSON from LLM response with multiple fallback strategies"""
+        import re
         # Strategy 1: Clean response of prompt repetition
         clean_response = response
             bnb_4bit_quant_type="nf4"
         )
+        model, tokenizer = FastLanguageModel.from_pretrained(
             self.config["llm_model"],
             quantization_config=bnb_config,
             low_cpu_mem_usage=True,
+            device_map="auto",
             token=hf_token
         )
+        # tokenizer = AutoTokenizer.from_pretrained(self.config["llm_model"])
+        tokenizer.pad_token = tokenizer.eos_token
         model_pipeline = pipeline(
             "text-generation",
             print(f"Raw response: {response[:500]}...")
             raise ValueError(f"Failed to parse LLM response: {e}")
     def generate_batch(self,
                       topics: List[str],
                       question_per_topic: int = 5,
         if question_types is None:
             question_types = [QuestionType.DEFINITION, QuestionType.APPLICATION]
+        mcqs = []
         total_questions = len(topics) * question_per_topic
         print(f"🎯 Generating {total_questions} MCQs...")
             print(f"📝 Processing topic {i+1}/{len(topics)}: {topic}")
             for j in range(question_per_topic):
+                try:
+                    # Cycle through difficulties and question types
+                    difficulty = difficulties[j % len(difficulties)]
+                    question_type = question_types[j % len(question_types)]
+                    mcq = self.generate_mcq(topic, difficulty, question_type)
+                    mcqs.append(mcq)
+                    print(f"  ✅ Generated question {j+1}/{question_per_topic} "
+                          f"(Quality: {mcq.confidence_score:.1f})")
+                except Exception as e:
+                    print(f"  ❌ Failed to generate question {j+1}: {e}")
+        print(f"🎉 Generated {len(mcqs)}/{total_questions} MCQs successfully")
         return mcqs
     def export_mcqs(self, mcqs: List[MCQQuestion], output_path: str):

fastapi_app.py CHANGED Viewed

@@ -9,8 +9,9 @@ from fastapi import FastAPI, Form, HTTPException, UploadFile, File
 from contextlib import asynccontextmanager
 generator: Optional[EnhancedRAGMCQGenerator] = None
-tmp_folder = "./tmp" #? make sure folder upload here
 if not os.path.exists(tmp_folder):
     os.makedirs(tmp_folder)
@@ -38,6 +39,7 @@ class GenerateResponse(BaseModel):
     avg_confidence: float
     generation_time: float
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global generator
@@ -57,14 +59,14 @@ app = FastAPI(
     lifespan=lifespan
 )
-#? cmd: fastapi run app.py
 @app.post("/generate/")
 async def mcq_gen(
     file: UploadFile = File(...),
     topics: str = Form(...),
     n_questions: str = Form(...),
-    difficulty: str = Form(...),
-    qtype: str = Form(...)
 ):
     if not generator:
         raise HTTPException(status_code=500, detail="Generator not initialized")
@@ -73,19 +75,6 @@ async def mcq_gen(
     if not topic_list:
         raise HTTPException(status_code=400, detail="At least one topic must be provided")
-    # Validate and convert enum values
-    try:
-        difficulty_enum = DifficultyLevel(difficulty.lower())
-    except ValueError:
-        valid_difficulties = [d.value for d in DifficultyLevel]
-        raise HTTPException(status_code=400, detail=f"Invalid difficulty. Must be one of: {valid_difficulties}")
-    try:
-        qtype_enum = QuestionType(qtype.lower())
-    except ValueError:
-        valid_types = [q.value for q in QuestionType]
-        raise HTTPException(status_code=400, detail=f"Invalid question type. Must be one of: {valid_types}")
     # Save uploaded PDF to temporary folder
     filename = file.filename if file.filename else "uploaded_file"
     file_path = os.path.join(tmp_folder, filename)
@@ -104,9 +93,7 @@ async def mcq_gen(
     try:
         mcqs = generator.generate_batch(
             topics=topic_list,
-            question_per_topic=int(n_questions),
-            difficulties=[difficulty_enum],
-            question_types=[qtype_enum]
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from contextlib import asynccontextmanager
 generator: Optional[EnhancedRAGMCQGenerator] = None
+tmp_folder = "./tmp"
 if not os.path.exists(tmp_folder):
     os.makedirs(tmp_folder)
     avg_confidence: float
     generation_time: float
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global generator
     lifespan=lifespan
 )
+#? cmd: uvicorn app:app --reload --reload-exclude unsloth_compiled_cache
 @app.post("/generate/")
 async def mcq_gen(
     file: UploadFile = File(...),
     topics: str = Form(...),
     n_questions: str = Form(...),
+    difficulty: DifficultyLevel = Form(...),
+    qtype: QuestionType = Form(...)
 ):
     if not generator:
         raise HTTPException(status_code=500, detail="Generator not initialized")
     if not topic_list:
         raise HTTPException(status_code=400, detail="At least one topic must be provided")
     # Save uploaded PDF to temporary folder
     filename = file.filename if file.filename else "uploaded_file"
     file_path = os.path.join(tmp_folder, filename)
     try:
         mcqs = generator.generate_batch(
             topics=topic_list,
+            question_per_topic=int(n_questions)
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))