File size: 9,496 Bytes
cd42151
 
a432d4a
 
 
 
94d357f
 
a432d4a
 
94d357f
a432d4a
94d357f
 
 
 
 
 
 
 
 
 
 
d23814c
94d357f
 
 
 
 
 
 
a432d4a
 
 
 
 
 
 
8e83a33
 
 
 
ab50cdb
 
8e83a33
483cbac
 
 
 
ab50cdb
 
8e83a33
ab50cdb
8e83a33
f6abac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a432d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
fb9a6a1
 
 
 
 
 
 
a432d4a
 
 
 
f6abac4
 
 
 
 
a432d4a
cd42151
a432d4a
 
9eb7bd1
a432d4a
 
 
 
 
 
 
94d357f
a432d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94d357f
a432d4a
 
 
e1f9cb7
 
 
 
 
 
 
 
 
 
 
a432d4a
 
 
 
 
 
cd42151
a432d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
94d357f
 
 
 
 
 
 
 
a432d4a
94d357f
 
 
 
 
 
 
 
9eb7bd1
 
e23fa96
a432d4a
e23fa96
a432d4a
 
 
 
 
ed4682a
a432d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab50cdb
a432d4a
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# -*- coding: utf-8 -*-

import json
import os
import random
import gradio as gr
from google import genai
from google.genai import types

# 取得 Hugging Face Token,用於 API 調用推論服務
# from huggingface_hub import InferenceClient

# API_KEY = os.environ.get("GROQ_API_KEY")
# API_BASE_URL = os.environ.get("GROQ_API_BASE_URL")

# client = InferenceClient(
#     api_key=API_KEY,
#     base_url=API_BASE_URL
# )

API_KEY = os.environ.get("GOOGLE_API_KEY")
client = genai.Client(api_key=API_KEY)

def generate_content(model="gemini-2.5-pro-exp-03-25", sys_prompt=None, user_prompt=None):
    response = client.models.generate_content(
        model=model,
        config=types.GenerateContentConfig(
            system_instruction=sys_prompt),
        contents=user_prompt
    )
    return response.text

# 載入問答資料庫路徑
output_dir = "./question_bank"

# 載入題庫字典(question_bank_dict),格式為 { "年級_學期": [題庫檔名列表] }
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))

import re

def remove_think_tags(content):
    """刪除 <think>...</think> 標籤及其內容的正規表達式解法"""
    # pattern = r'<think>.*?</think>'
    # cleaned_content = re.sub(pattern, '', content, flags=re.DOTALL)

    # # 刪除所有 <think> 標籤及其內容
    # cleaned_content = re.sub(r'<\s*think[^>]*>.*?<\s*/\s*think\s*>', '', content, flags=re.DOTALL)
    # # 清除殘留空行 (連續兩個以上換行符)
    # cleaned_content = re.sub(r'\n{3,}', '\n\n', cleaned_content)
    cleaned_content = content.replace('<think>', '').replace('</think>', '')
    cleaned_content = cleaned_content.strip()
    
    return cleaned_content

def random_questions_with_limit(data, limit=20000):
    """
    隨機從 data 中挑選題目,並將總字串長度限制在 limit 字元數內(至少不小於 5000)。
    會傳回:
    - result_list:篩選後的題目列表
    - result_str:將篩選後的題目以兩行空白分隔串接的字串
    - count:所選題卷的數量
    """
    # 確保 limit 不小於 5000
    limit = max(limit, 5000)

    # 將題目列表隨機洗牌
    random.shuffle(data)

    result_list = []
    current_length = 0
    count = 0

    for item in data:
        # 如果加入下一個題目後長度不超過 limit,則加入結果列表
        if current_length + len(item) <= limit:
            result_list.append(item)
            current_length += len(item)
            count += 1

    return result_list, "\n\n".join(result_list), count

def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
    """
    根據年級、學期以及指定的題型(qtype)和題目數量(num_questions),
    從已存在的題庫中擷取問題,並使用 Hugging Face 模型產生新的英文題目與答案。
    """

    # 轉換使用者選擇的英文到中文描述
    grades = {"1": "一", "2": "二", "3": "三", "4": "四", "5": "五", "6": "六"}
    terms = {"First": "上", "Second": "下"}

    # 若 qtype 沒有指定,則預設為多種題型
    if qtype == "Unspecified":
        qtype = "Multiple-choice/Fill-in-the-blank/Short-answer/True-False"

    print(f"""
    Grade = {grade}
    Term = {term}
    QType = {qtype}
    NumQuestions = {num_questions}
    """)
    
    # 組合年級與學期的描述字串,例如 "一"年級_"上"學期
    grade_semester = f"{grades[grade]}年級_{terms[term]}學期"

    # 根據題庫字典從指定年級學期中取得所有檔名,並嘗試讀取其對應的 markdown 檔案內容
    raw_questions = [
        open(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}").read()
        for doc_path in question_bank_dict[grade_semester]
        if os.path.exists(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}")
    ]

    print(f"Retrieved {len(raw_questions)} document(s)")

    # 隨機選取並限制字串長度的題庫內容
    input_question_bank = random_questions_with_limit(raw_questions, 10000)

    # 系統訊息 (system) 與使用者要求 (user) 的對話內容
    # 說明給模型:要產生新的英文題目,保持難度與風格類似,但不要直接抄題庫原文
    messages = [
        {
            "role": "system",
            "content": """
            You are an advanced AI assistant designed to generate educational questions. Your task is to create new and diverse questions based on existing question banks. The new questions should be similar in difficulty, style, and structure to the input questions but must not directly replicate them. **All responses must be in Traditional Chinese.**

            Key Instructions:
            1. Maintain the educational purpose of the questions.
            2. Ensure diversity in phrasing, answer options, and scenarios.
            3. Adjust the difficulty level only if specified by the user.
            4. Support multiple question types, including:
            - Multiple-choice questions
            - Fill-in-the-blank
            - Short-answer questions
            - True/false questions
            5. Provide a detailed and accurate answer key for each generated question.
            6. When applicable, use creative but relevant contexts (e.g., real-life scenarios, stories, or analogies) to make questions engaging.

            Output Format:
            - Clearly distinguish the new question and the answer.
            - If multiple questions are generated, number them sequentially.
            - Ensure clarity and correctness in all generated content.
            - Respond only in **Traditional Chinese**."""
        },
        {
            "role": "user",
            # I have the following question bank. Please generate new, diverse, and similar questions based on it. Ensure the generated questions are appropriate for the same audience and maintain the educational focus, and . Include the correct answers for each question.
            "content": f"""
            I have the following question bank. Please follow these guidelines to generate new, diverse, and pedagogically appropriate questions:
            1. **Maintain strict alignment with original question formats** - For multiple choice questions provide 4 fully developed options; for true/false questions include both statement variations; for fill-in-the-blank precisely indicate blank locations with underscores/braces
            2. **Structure output with clear question components**:
               - Multiple Choice: [Stem] > [Options A-D] > [Correct Answer]
               - True/False: [Statement] > [Answer Rationale]
               - Fill-in-the-Blank: [Context with clearly marked blanks] > [Answer Key]
            3. Ensure cognitive consistency with original difficulty level and subject focus
            4. Include annotated answer explanations for each generated question
            5. **Organize output in a table format with columns: Question Type, Question Structure, Answer Details**

            Input Question Bank:
            {input_question_bank[1]}

            Requirements for the generated questions:
            - Number of new questions: {num_questions}
            - Include the following types: {qtype}

            Example Output Format:
            1. Question: [New Question 1]
            Answer: [Answer 1]

            2. Question: [New Question 2]
            Answer: [Answer 2]

            3. Question: [New Question 3]
            Answer: [Answer 3]
            """
        }
    ]

    # # 使用 InferenceClient 呼叫 API 模型產生新題目
    # completion = client.chat.completions.create(
    #     # model="mistralai/Mistral-7B-Instruct-v0.3",
    #     model="deepseek-r1-distill-qwen-32b",
    #     # model="mistralai/Mistral-Nemo-Instruct-2407", 
    #     messages=messages,
    #     max_tokens=1024
    # )

    # response = completion.choices[0].message.content

    response = generate_content(
        sys_prompt=messages[0]['content'], 
        user_prompt=messages[1]['content']
    )
    print(response)
    
    # response = remove_think_tags(response)
    # print(response)

    # 傳回模型產生的文本
    return response

# 建立 Gradio 介面
with gr.Blocks() as app:
    # 介面標題區
    gr.Markdown("""
    # 🤖🧮 AI Math Quiz Maker

    **Need some math questions? Let me help! Choose the grade, term, type of questions, and number of questions, and I'll generate them for you!**
    """)

    # 使用水平排列顯示年級與學期選擇
    with gr.Row():
        grade = gr.Dropdown(label="🎓 Select Grade", choices=["1", "2", "3", "4", "5", "6"], value="6")
        term = gr.Radio(label="📆 Select Term", choices=["First", "Second"], value="First")

    # 選擇題型與題目數量
    qtype = gr.Radio(label="✏️ Select Question Type", choices=["Multiple-choice", "Fill-in-the-blank", "Short-answer", "True-False", "Unspecified"], value="Multiple-choice")
    num_questions = gr.Number(label="🔢 Number of Questions", value=10, precision=0, maximum=20, minimum=1)

    # 產生題目的按鈕與輸出區塊
    generate_button = gr.Button("🚀 Generate Questions")
    output = gr.Markdown("📝 Your questions will appear here!", label="📝 Your questions")

    # 按下按鈕後呼叫 generate_math_questions 函式並顯示結果
    generate_button.click(generate_math_questions, inputs=[grade, term, qtype, num_questions], outputs=output)

# 啟動 Gradio 介面
app.launch()