Spaces:

aicodingfun
/

ai-math-quiz-maker

Running

File size: 9,511 Bytes

# -*- coding: utf-8 -*-

import json
import os
import random
import gradio as gr
from google import genai
from google.genai import types

# 取得 Hugging Face Token，用於 API 調用推論服務
# from huggingface_hub import InferenceClient

# API_KEY = os.environ.get("GROQ_API_KEY")
# API_BASE_URL = os.environ.get("GROQ_API_BASE_URL")

# client = InferenceClient(
#     api_key=API_KEY,
#     base_url=API_BASE_URL
# )

API_KEY = os.environ.get("GOOGLE_API_KEY")
client = genai.Client(api_key=API_KEY)

def generate_content(model="gemini-2.5-flash-preview-04-17-thinking", sys_prompt=None, user_prompt=None):
    response = client.models.generate_content(
        model=model,
        config=types.GenerateContentConfig(
            system_instruction=sys_prompt),
        contents=user_prompt
    )
    return response.text

# 載入問答資料庫路徑
output_dir = "./question_bank"

# 載入題庫字典(question_bank_dict)，格式為 { "年級_學期": [題庫檔名列表] }
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))

import re

def remove_think_tags(content):
    """刪除 <think>...</think> 標籤及其內容的正規表達式解法"""
    # pattern = r'<think>.*?</think>'
    # cleaned_content = re.sub(pattern, '', content, flags=re.DOTALL)

    # # 刪除所有 <think> 標籤及其內容
    # cleaned_content = re.sub(r'<\s*think[^>]*>.*?<\s*/\s*think\s*>', '', content, flags=re.DOTALL)
    # # 清除殘留空行 (連續兩個以上換行符)
    # cleaned_content = re.sub(r'\n{3,}', '\n\n', cleaned_content)
    cleaned_content = content.replace('<think>', '').replace('</think>', '')
    cleaned_content = cleaned_content.strip()
    
    return cleaned_content

def random_questions_with_limit(data, limit=20000):
    """
    隨機從 data 中挑選題目，並將總字串長度限制在 limit 字元數內（至少不小於 5000）。
    會傳回：
    - result_list：篩選後的題目列表
    - result_str：將篩選後的題目以兩行空白分隔串接的字串
    - count：所選題卷的數量
    """
    # 確保 limit 不小於 5000
    limit = max(limit, 5000)

    # 將題目列表隨機洗牌
    random.shuffle(data)

    result_list = []
    current_length = 0
    count = 0

    for item in data:
        # 如果加入下一個題目後長度不超過 limit，則加入結果列表
        if current_length + len(item) <= limit:
            result_list.append(item)
            current_length += len(item)
            count += 1

    return result_list, "\n\n".join(result_list), count

def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
    """
    根據年級、學期以及指定的題型(qtype)和題目數量(num_questions)，
    從已存在的題庫中擷取問題，並使用 Hugging Face 模型產生新的英文題目與答案。
    """

    # 轉換使用者選擇的英文到中文描述
    grades = {"1": "一", "2": "二", "3": "三", "4": "四", "5": "五", "6": "六"}
    terms = {"First": "上", "Second": "下"}

    # 若 qtype 沒有指定，則預設為多種題型
    if qtype == "Unspecified":
        qtype = "Multiple-choice/Fill-in-the-blank/Short-answer/True-False"

    print(f"""
    Grade = {grade}
    Term = {term}
    QType = {qtype}
    NumQuestions = {num_questions}
    """)
    
    # 組合年級與學期的描述字串，例如 "一"年級_"上"學期
    grade_semester = f"{grades[grade]}年級_{terms[term]}學期"

    # 根據題庫字典從指定年級學期中取得所有檔名，並嘗試讀取其對應的 markdown 檔案內容
    raw_questions = [
        open(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}").read()
        for doc_path in question_bank_dict[grade_semester]
        if os.path.exists(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}")
    ]

    print(f"Retrieved {len(raw_questions)} document(s)")

    # 隨機選取並限制字串長度的題庫內容
    input_question_bank = random_questions_with_limit(raw_questions, 10000)

    # 系統訊息 (system) 與使用者要求 (user) 的對話內容
    # 說明給模型：要產生新的英文題目，保持難度與風格類似，但不要直接抄題庫原文
    messages = [
        {
            "role": "system",
            "content": """
            You are an advanced AI assistant designed to generate educational questions. Your task is to create new and diverse questions based on existing question banks. The new questions should be similar in difficulty, style, and structure to the input questions but must not directly replicate them. **All responses must be in Traditional Chinese.**

            Key Instructions:
            1. Maintain the educational purpose of the questions.
            2. Ensure diversity in phrasing, answer options, and scenarios.
            3. Adjust the difficulty level only if specified by the user.
            4. Support multiple question types, including:
            - Multiple-choice questions
            - Fill-in-the-blank
            - Short-answer questions
            - True/false questions
            5. Provide a detailed and accurate answer key for each generated question.
            6. When applicable, use creative but relevant contexts (e.g., real-life scenarios, stories, or analogies) to make questions engaging.

            Output Format:
            - Clearly distinguish the new question and the answer.
            - If multiple questions are generated, number them sequentially.
            - Ensure clarity and correctness in all generated content.
            - Respond only in **Traditional Chinese**."""
        },
        {
            "role": "user",
            # I have the following question bank. Please generate new, diverse, and similar questions based on it. Ensure the generated questions are appropriate for the same audience and maintain the educational focus, and . Include the correct answers for each question.
            "content": f"""
            I have the following question bank. Please follow these guidelines to generate new, diverse, and pedagogically appropriate questions:
            1. **Maintain strict alignment with original question formats** - For multiple choice questions provide 4 fully developed options; for true/false questions include both statement variations; for fill-in-the-blank precisely indicate blank locations with underscores/braces
            2. **Structure output with clear question components**:
               - Multiple Choice: [Stem] > [Options A-D] > [Correct Answer]
               - True/False: [Statement] > [Answer Rationale]
               - Fill-in-the-Blank: [Context with clearly marked blanks] > [Answer Key]
            3. Ensure cognitive consistency with original difficulty level and subject focus
            4. Include annotated answer explanations for each generated question
            5. **Organize output in a table format with columns: Question Type, Question Structure, Answer Details**

            Input Question Bank:
            {input_question_bank[1]}

            Requirements for the generated questions:
            - Number of new questions: {num_questions}
            - Include the following types: {qtype}

            Example Output Format:
            1. Question: [New Question 1]
            Answer: [Answer 1]

            2. Question: [New Question 2]
            Answer: [Answer 2]

            3. Question: [New Question 3]
            Answer: [Answer 3]
            """
        }
    ]

    # # 使用 InferenceClient 呼叫 API 模型產生新題目
    # completion = client.chat.completions.create(
    #     # model="mistralai/Mistral-7B-Instruct-v0.3",
    #     model="deepseek-r1-distill-qwen-32b",
    #     # model="mistralai/Mistral-Nemo-Instruct-2407", 
    #     messages=messages,
    #     max_tokens=1024
    # )

    # response = completion.choices[0].message.content

    response = generate_content(
        sys_prompt=messages[0]['content'], 
        user_prompt=messages[1]['content']
    )
    print(response)
    
    # response = remove_think_tags(response)
    # print(response)

    # 傳回模型產生的文本
    return response

# 建立 Gradio 介面
with gr.Blocks() as app:
    # 介面標題區
    gr.Markdown("""
    # 🤖🧮 AI Math Quiz Maker

    **Need some math questions? Let me help! Choose the grade, term, type of questions, and number of questions, and I'll generate them for you!**
    """)

    # 使用水平排列顯示年級與學期選擇
    with gr.Row():
        grade = gr.Dropdown(label="🎓 Select Grade", choices=["1", "2", "3", "4", "5", "6"], value="6")
        term = gr.Radio(label="📆 Select Term", choices=["First", "Second"], value="First")

    # 選擇題型與題目數量
    qtype = gr.Radio(label="✏️ Select Question Type", choices=["Multiple-choice", "Fill-in-the-blank", "Short-answer", "True-False", "Unspecified"], value="Multiple-choice")
    num_questions = gr.Number(label="🔢 Number of Questions", value=10, precision=0, maximum=20, minimum=1)

    # 產生題目的按鈕與輸出區塊
    generate_button = gr.Button("🚀 Generate Questions")
    output = gr.Markdown("📝 Your questions will appear here!", label="📝 Your questions")

    # 按下按鈕後呼叫 generate_math_questions 函式並顯示結果
    generate_button.click(generate_math_questions, inputs=[grade, term, qtype, num_questions], outputs=output)

# 啟動 Gradio 介面
app.launch()