Spaces:
Sleeping
Sleeping
File size: 9,496 Bytes
cd42151 a432d4a 94d357f a432d4a 94d357f a432d4a 94d357f d23814c 94d357f a432d4a 8e83a33 ab50cdb 8e83a33 483cbac ab50cdb 8e83a33 ab50cdb 8e83a33 f6abac4 a432d4a fb9a6a1 a432d4a f6abac4 a432d4a cd42151 a432d4a 9eb7bd1 a432d4a 94d357f a432d4a 94d357f a432d4a e1f9cb7 a432d4a cd42151 a432d4a 94d357f a432d4a 94d357f 9eb7bd1 e23fa96 a432d4a e23fa96 a432d4a ed4682a a432d4a ab50cdb a432d4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# -*- coding: utf-8 -*-
import json
import os
import random
import gradio as gr
from google import genai
from google.genai import types
# 取得 Hugging Face Token,用於 API 調用推論服務
# from huggingface_hub import InferenceClient
# API_KEY = os.environ.get("GROQ_API_KEY")
# API_BASE_URL = os.environ.get("GROQ_API_BASE_URL")
# client = InferenceClient(
# api_key=API_KEY,
# base_url=API_BASE_URL
# )
API_KEY = os.environ.get("GOOGLE_API_KEY")
client = genai.Client(api_key=API_KEY)
def generate_content(model="gemini-2.5-pro-exp-03-25", sys_prompt=None, user_prompt=None):
response = client.models.generate_content(
model=model,
config=types.GenerateContentConfig(
system_instruction=sys_prompt),
contents=user_prompt
)
return response.text
# 載入問答資料庫路徑
output_dir = "./question_bank"
# 載入題庫字典(question_bank_dict),格式為 { "年級_學期": [題庫檔名列表] }
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))
import re
def remove_think_tags(content):
"""刪除 <think>...</think> 標籤及其內容的正規表達式解法"""
# pattern = r'<think>.*?</think>'
# cleaned_content = re.sub(pattern, '', content, flags=re.DOTALL)
# # 刪除所有 <think> 標籤及其內容
# cleaned_content = re.sub(r'<\s*think[^>]*>.*?<\s*/\s*think\s*>', '', content, flags=re.DOTALL)
# # 清除殘留空行 (連續兩個以上換行符)
# cleaned_content = re.sub(r'\n{3,}', '\n\n', cleaned_content)
cleaned_content = content.replace('<think>', '').replace('</think>', '')
cleaned_content = cleaned_content.strip()
return cleaned_content
def random_questions_with_limit(data, limit=20000):
"""
隨機從 data 中挑選題目,並將總字串長度限制在 limit 字元數內(至少不小於 5000)。
會傳回:
- result_list:篩選後的題目列表
- result_str:將篩選後的題目以兩行空白分隔串接的字串
- count:所選題卷的數量
"""
# 確保 limit 不小於 5000
limit = max(limit, 5000)
# 將題目列表隨機洗牌
random.shuffle(data)
result_list = []
current_length = 0
count = 0
for item in data:
# 如果加入下一個題目後長度不超過 limit,則加入結果列表
if current_length + len(item) <= limit:
result_list.append(item)
current_length += len(item)
count += 1
return result_list, "\n\n".join(result_list), count
def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
"""
根據年級、學期以及指定的題型(qtype)和題目數量(num_questions),
從已存在的題庫中擷取問題,並使用 Hugging Face 模型產生新的英文題目與答案。
"""
# 轉換使用者選擇的英文到中文描述
grades = {"1": "一", "2": "二", "3": "三", "4": "四", "5": "五", "6": "六"}
terms = {"First": "上", "Second": "下"}
# 若 qtype 沒有指定,則預設為多種題型
if qtype == "Unspecified":
qtype = "Multiple-choice/Fill-in-the-blank/Short-answer/True-False"
print(f"""
Grade = {grade}
Term = {term}
QType = {qtype}
NumQuestions = {num_questions}
""")
# 組合年級與學期的描述字串,例如 "一"年級_"上"學期
grade_semester = f"{grades[grade]}年級_{terms[term]}學期"
# 根據題庫字典從指定年級學期中取得所有檔名,並嘗試讀取其對應的 markdown 檔案內容
raw_questions = [
open(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}").read()
for doc_path in question_bank_dict[grade_semester]
if os.path.exists(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}")
]
print(f"Retrieved {len(raw_questions)} document(s)")
# 隨機選取並限制字串長度的題庫內容
input_question_bank = random_questions_with_limit(raw_questions, 10000)
# 系統訊息 (system) 與使用者要求 (user) 的對話內容
# 說明給模型:要產生新的英文題目,保持難度與風格類似,但不要直接抄題庫原文
messages = [
{
"role": "system",
"content": """
You are an advanced AI assistant designed to generate educational questions. Your task is to create new and diverse questions based on existing question banks. The new questions should be similar in difficulty, style, and structure to the input questions but must not directly replicate them. **All responses must be in Traditional Chinese.**
Key Instructions:
1. Maintain the educational purpose of the questions.
2. Ensure diversity in phrasing, answer options, and scenarios.
3. Adjust the difficulty level only if specified by the user.
4. Support multiple question types, including:
- Multiple-choice questions
- Fill-in-the-blank
- Short-answer questions
- True/false questions
5. Provide a detailed and accurate answer key for each generated question.
6. When applicable, use creative but relevant contexts (e.g., real-life scenarios, stories, or analogies) to make questions engaging.
Output Format:
- Clearly distinguish the new question and the answer.
- If multiple questions are generated, number them sequentially.
- Ensure clarity and correctness in all generated content.
- Respond only in **Traditional Chinese**."""
},
{
"role": "user",
# I have the following question bank. Please generate new, diverse, and similar questions based on it. Ensure the generated questions are appropriate for the same audience and maintain the educational focus, and . Include the correct answers for each question.
"content": f"""
I have the following question bank. Please follow these guidelines to generate new, diverse, and pedagogically appropriate questions:
1. **Maintain strict alignment with original question formats** - For multiple choice questions provide 4 fully developed options; for true/false questions include both statement variations; for fill-in-the-blank precisely indicate blank locations with underscores/braces
2. **Structure output with clear question components**:
- Multiple Choice: [Stem] > [Options A-D] > [Correct Answer]
- True/False: [Statement] > [Answer Rationale]
- Fill-in-the-Blank: [Context with clearly marked blanks] > [Answer Key]
3. Ensure cognitive consistency with original difficulty level and subject focus
4. Include annotated answer explanations for each generated question
5. **Organize output in a table format with columns: Question Type, Question Structure, Answer Details**
Input Question Bank:
{input_question_bank[1]}
Requirements for the generated questions:
- Number of new questions: {num_questions}
- Include the following types: {qtype}
Example Output Format:
1. Question: [New Question 1]
Answer: [Answer 1]
2. Question: [New Question 2]
Answer: [Answer 2]
3. Question: [New Question 3]
Answer: [Answer 3]
"""
}
]
# # 使用 InferenceClient 呼叫 API 模型產生新題目
# completion = client.chat.completions.create(
# # model="mistralai/Mistral-7B-Instruct-v0.3",
# model="deepseek-r1-distill-qwen-32b",
# # model="mistralai/Mistral-Nemo-Instruct-2407",
# messages=messages,
# max_tokens=1024
# )
# response = completion.choices[0].message.content
response = generate_content(
sys_prompt=messages[0]['content'],
user_prompt=messages[1]['content']
)
print(response)
# response = remove_think_tags(response)
# print(response)
# 傳回模型產生的文本
return response
# 建立 Gradio 介面
with gr.Blocks() as app:
# 介面標題區
gr.Markdown("""
# 🤖🧮 AI Math Quiz Maker
**Need some math questions? Let me help! Choose the grade, term, type of questions, and number of questions, and I'll generate them for you!**
""")
# 使用水平排列顯示年級與學期選擇
with gr.Row():
grade = gr.Dropdown(label="🎓 Select Grade", choices=["1", "2", "3", "4", "5", "6"], value="6")
term = gr.Radio(label="📆 Select Term", choices=["First", "Second"], value="First")
# 選擇題型與題目數量
qtype = gr.Radio(label="✏️ Select Question Type", choices=["Multiple-choice", "Fill-in-the-blank", "Short-answer", "True-False", "Unspecified"], value="Multiple-choice")
num_questions = gr.Number(label="🔢 Number of Questions", value=10, precision=0, maximum=20, minimum=1)
# 產生題目的按鈕與輸出區塊
generate_button = gr.Button("🚀 Generate Questions")
output = gr.Markdown("📝 Your questions will appear here!", label="📝 Your questions")
# 按下按鈕後呼叫 generate_math_questions 函式並顯示結果
generate_button.click(generate_math_questions, inputs=[grade, term, qtype, num_questions], outputs=output)
# 啟動 Gradio 介面
app.launch() |