Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,35 @@ output_dir = "./question_bank"
|
|
15 |
# 載入題庫字典(question_bank_dict),格式為 { "年級_學期": [題庫檔名列表] }
|
16 |
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
19 |
"""
|
20 |
根據年級、學期以及指定的題型(qtype)和題目數量(num_questions),
|
@@ -33,17 +62,14 @@ def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
|
33 |
grade_semester = f"{grades[grade]}年級_{terms[term]}學期"
|
34 |
|
35 |
# 根據題庫字典從指定年級學期中取得所有檔名,並嘗試讀取其對應的 markdown 檔案內容
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
filename = filename.replace('.pdf', '.md')
|
41 |
-
if not os.path.exists(f"{output_dir}/md/{filename}"): continue
|
42 |
-
|
43 |
-
raw_questions.append(open(f"{output_dir}/md/{filename}").read())
|
44 |
|
45 |
-
print(len(raw_questions))
|
46 |
-
|
47 |
# 隨機選取並限制字串長度的題庫內容
|
48 |
input_question_bank = random_questions_with_limit(raw_questions, 20000)
|
49 |
|
@@ -98,7 +124,6 @@ def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
|
98 |
]
|
99 |
|
100 |
# 使用 InferenceClient 呼叫 API 模型產生新題目
|
101 |
-
# 請自行挑選使用哪一個開源大語言模型,並自行調整模型輸入參數,看看會產生什麼結果
|
102 |
completion = client.chat.completions.create(
|
103 |
model="mistralai/Mistral-Nemo-Instruct-2407",
|
104 |
messages=messages,
|
@@ -108,33 +133,6 @@ def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
|
108 |
# 傳回模型產生的文本
|
109 |
return completion.choices[0].message.content
|
110 |
|
111 |
-
def random_questions_with_limit(data, limit=20000):
|
112 |
-
"""
|
113 |
-
隨機從 data 中挑選題目,並將總字串長度限制在 limit 字元數內(至少不小於 5000)。
|
114 |
-
會傳回:
|
115 |
-
- result_list:篩選後的題目列表
|
116 |
-
- result_str:將篩選後的題目以兩行空白分隔串接的字串
|
117 |
-
- count:所選題卷的數量
|
118 |
-
"""
|
119 |
-
# 確保 limit 不小於 5000
|
120 |
-
limit = max(limit, 5000)
|
121 |
-
|
122 |
-
# 將題目列表隨機洗牌
|
123 |
-
random.shuffle(data)
|
124 |
-
|
125 |
-
result_list = []
|
126 |
-
current_length = 0
|
127 |
-
count = 0
|
128 |
-
|
129 |
-
for item in data:
|
130 |
-
# 如果加入下一個題目後長度不超過 limit,則加入結果列表
|
131 |
-
if current_length + len(item) <= limit:
|
132 |
-
result_list.append(item)
|
133 |
-
current_length += len(item)
|
134 |
-
count += 1
|
135 |
-
|
136 |
-
return result_list, "\n\n".join(result_list), count
|
137 |
-
|
138 |
# 建立 Gradio 介面
|
139 |
with gr.Blocks() as app:
|
140 |
# 介面標題區
|
|
|
15 |
# 載入題庫字典(question_bank_dict),格式為 { "年級_學期": [題庫檔名列表] }
|
16 |
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))
|
17 |
|
18 |
+
print(question_bank_dict)
|
19 |
+
|
20 |
+
def random_questions_with_limit(data, limit=20000):
|
21 |
+
"""
|
22 |
+
隨機從 data 中挑選題目,並將總字串長度限制在 limit 字元數內(至少不小於 5000)。
|
23 |
+
會傳回:
|
24 |
+
- result_list:篩選後的題目列表
|
25 |
+
- result_str:將篩選後的題目以兩行空白分隔串接的字串
|
26 |
+
- count:所選題卷的數量
|
27 |
+
"""
|
28 |
+
# 確保 limit 不小於 5000
|
29 |
+
limit = max(limit, 5000)
|
30 |
+
|
31 |
+
# 將題目列表隨機洗牌
|
32 |
+
random.shuffle(data)
|
33 |
+
|
34 |
+
result_list = []
|
35 |
+
current_length = 0
|
36 |
+
count = 0
|
37 |
+
|
38 |
+
for item in data:
|
39 |
+
# 如果加入下一個題目後長度不超過 limit,則加入結果列表
|
40 |
+
if current_length + len(item) <= limit:
|
41 |
+
result_list.append(item)
|
42 |
+
current_length += len(item)
|
43 |
+
count += 1
|
44 |
+
|
45 |
+
return result_list, "\n\n".join(result_list), count
|
46 |
+
|
47 |
def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
48 |
"""
|
49 |
根據年級、學期以及指定的題型(qtype)和題目數量(num_questions),
|
|
|
62 |
grade_semester = f"{grades[grade]}年級_{terms[term]}學期"
|
63 |
|
64 |
# 根據題庫字典從指定年級學期中取得所有檔名,並嘗試讀取其對應的 markdown 檔案內容
|
65 |
+
raw_questions = [
|
66 |
+
open(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}").read()
|
67 |
+
for doc_path in question_bank_dict[grade_semester]
|
68 |
+
if os.path.exists(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}")
|
69 |
+
]
|
70 |
|
71 |
+
print(raw_questions)
|
|
|
|
|
|
|
|
|
72 |
|
|
|
|
|
73 |
# 隨機選取並限制字串長度的題庫內容
|
74 |
input_question_bank = random_questions_with_limit(raw_questions, 20000)
|
75 |
|
|
|
124 |
]
|
125 |
|
126 |
# 使用 InferenceClient 呼叫 API 模型產生新題目
|
|
|
127 |
completion = client.chat.completions.create(
|
128 |
model="mistralai/Mistral-Nemo-Instruct-2407",
|
129 |
messages=messages,
|
|
|
133 |
# 傳回模型產生的文本
|
134 |
return completion.choices[0].message.content
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
# 建立 Gradio 介面
|
137 |
with gr.Blocks() as app:
|
138 |
# 介面標題區
|