Update app/main.py
Browse files- app/main.py +42 -46
app/main.py
CHANGED
@@ -10,13 +10,15 @@ from datetime import datetime
|
|
10 |
from io import BytesIO
|
11 |
from urllib.parse import quote_plus
|
12 |
from jinja2 import Template
|
13 |
-
import
|
14 |
|
15 |
# —— 环境变量读取 —— #
|
16 |
BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "")
|
17 |
BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
|
18 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
19 |
-
|
|
|
|
|
20 |
|
21 |
# —— Baidu OCR —— #
|
22 |
def get_access_token(api_key, secret_key):
|
@@ -32,9 +34,8 @@ def get_access_token(api_key, secret_key):
|
|
32 |
return resp.json().get("access_token")
|
33 |
|
34 |
def ocr_image(image_bytes: bytes, token: str):
|
35 |
-
|
36 |
-
|
37 |
-
img_encoded = quote_plus(img_b64)
|
38 |
resp = requests.post(
|
39 |
f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
|
40 |
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
@@ -44,102 +45,95 @@ def ocr_image(image_bytes: bytes, token: str):
|
|
44 |
}
|
45 |
)
|
46 |
resp.raise_for_status()
|
47 |
-
|
48 |
-
return data.get("words_result", [])
|
49 |
|
50 |
# —— 文本高亮 —— #
|
51 |
def highlight_brackets(text: str) -> str:
|
52 |
-
# 中括号绿色;小括号红色
|
53 |
text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
|
54 |
text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
|
55 |
return text.replace("\n", "<br>")
|
56 |
|
57 |
# —— 主处理函数 —— #
|
58 |
def process(image_pil):
|
59 |
-
#
|
60 |
buf = BytesIO()
|
61 |
image_pil.save(buf, format="PNG")
|
62 |
image_bytes = buf.getvalue()
|
63 |
|
64 |
-
#
|
65 |
-
token
|
66 |
-
words
|
67 |
essay_text = "\n".join([w["words"] for w in words])
|
68 |
|
69 |
-
#
|
70 |
fmt_prompt = (
|
71 |
"请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
|
72 |
"保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
|
73 |
"拼写错误保留:\n\n" + essay_text
|
74 |
)
|
75 |
-
|
76 |
model="gpt-4o-mini",
|
77 |
messages=[{"role": "user", "content": fmt_prompt}]
|
78 |
)
|
79 |
-
revised =
|
80 |
|
81 |
-
#
|
82 |
corr_prompt = (
|
83 |
"请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
|
84 |
"原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
|
85 |
)
|
86 |
-
|
87 |
model="gpt-4o-mini",
|
88 |
messages=[{"role": "user", "content": corr_prompt}]
|
89 |
)
|
90 |
-
corrected =
|
91 |
|
92 |
-
#
|
93 |
review_prompt = (
|
94 |
"下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" + corrected
|
95 |
)
|
96 |
-
|
97 |
model="gpt-4o-mini",
|
98 |
messages=[{"role": "user", "content": review_prompt}]
|
99 |
)
|
100 |
-
review =
|
101 |
|
102 |
-
#
|
103 |
rate_prompt = (
|
104 |
"请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" + revised
|
105 |
)
|
106 |
-
|
107 |
model="gpt-4o-mini",
|
108 |
messages=[{"role": "user", "content": rate_prompt}]
|
109 |
)
|
110 |
-
rating =
|
111 |
|
112 |
-
#
|
113 |
rewrite_prompt = (
|
114 |
"请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
|
115 |
)
|
116 |
-
|
117 |
model="gpt-4o-mini",
|
118 |
messages=[{"role": "user", "content": rewrite_prompt}]
|
119 |
)
|
120 |
-
perfect =
|
121 |
|
122 |
-
#
|
123 |
-
code
|
124 |
tpl_path = os.path.join("app", "templates", "base.html")
|
125 |
with open(tpl_path, encoding="utf-8") as f:
|
126 |
tpl = Template(f.read())
|
127 |
|
128 |
html_content = (
|
129 |
-
"<h2>原文格式化</h2>"
|
130 |
-
+
|
131 |
-
|
132 |
-
+
|
133 |
-
|
134 |
-
+ markdown.markdown(review)
|
135 |
-
+ "<h2>评分</h2>"
|
136 |
-
+ markdown.markdown(rating)
|
137 |
-
+ "<h2>优秀范文</h2>"
|
138 |
-
+ markdown.markdown(perfect)
|
139 |
)
|
140 |
full_html = tpl.render(code=code, content=html_content)
|
141 |
|
142 |
-
#
|
143 |
output_dir = os.path.join("app", "output")
|
144 |
os.makedirs(output_dir, exist_ok=True)
|
145 |
html_path = os.path.join(output_dir, f"{code}.html")
|
@@ -148,19 +142,21 @@ def process(image_pil):
|
|
148 |
with open(html_path, "w", encoding="utf-8") as f:
|
149 |
f.write(full_html)
|
150 |
|
151 |
-
pdfkit.from_string(
|
152 |
-
|
|
|
|
|
153 |
|
154 |
return full_html, html_path, pdf_path
|
155 |
|
156 |
# —— Gradio 接口 —— #
|
157 |
with gr.Blocks(title="英语作文批改") as demo:
|
158 |
gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
|
159 |
-
image_in
|
160 |
output_html = gr.HTML()
|
161 |
-
btn
|
162 |
-
file_html
|
163 |
-
file_pdf
|
164 |
|
165 |
btn.click(
|
166 |
fn=process,
|
|
|
10 |
from io import BytesIO
|
11 |
from urllib.parse import quote_plus
|
12 |
from jinja2 import Template
|
13 |
+
from openai import OpenAI
|
14 |
|
15 |
# —— 环境变量读取 —— #
|
16 |
BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "")
|
17 |
BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
|
18 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
19 |
+
|
20 |
+
# —— 初始化 OpenAI v1 客户端 —— #
|
21 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
22 |
|
23 |
# —— Baidu OCR —— #
|
24 |
def get_access_token(api_key, secret_key):
|
|
|
34 |
return resp.json().get("access_token")
|
35 |
|
36 |
def ocr_image(image_bytes: bytes, token: str):
|
37 |
+
img_b64 = base64.b64encode(image_bytes).decode()
|
38 |
+
img_encoded = quote_plus(img_b64) # URL 编码
|
|
|
39 |
resp = requests.post(
|
40 |
f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
|
41 |
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
|
45 |
}
|
46 |
)
|
47 |
resp.raise_for_status()
|
48 |
+
return resp.json().get("words_result", [])
|
|
|
49 |
|
50 |
# —— 文本高亮 —— #
|
51 |
def highlight_brackets(text: str) -> str:
|
|
|
52 |
text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
|
53 |
text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
|
54 |
return text.replace("\n", "<br>")
|
55 |
|
56 |
# —— 主处理函数 —— #
|
57 |
def process(image_pil):
|
58 |
+
# —— PIL 转 bytes —— #
|
59 |
buf = BytesIO()
|
60 |
image_pil.save(buf, format="PNG")
|
61 |
image_bytes = buf.getvalue()
|
62 |
|
63 |
+
# —— OCR 提取原文 —— #
|
64 |
+
token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
|
65 |
+
words = ocr_image(image_bytes, token)
|
66 |
essay_text = "\n".join([w["words"] for w in words])
|
67 |
|
68 |
+
# —— 1. 格式化原文 —— #
|
69 |
fmt_prompt = (
|
70 |
"请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
|
71 |
"保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
|
72 |
"拼写错误保留:\n\n" + essay_text
|
73 |
)
|
74 |
+
fm_resp = client.chat.completions.create(
|
75 |
model="gpt-4o-mini",
|
76 |
messages=[{"role": "user", "content": fmt_prompt}]
|
77 |
)
|
78 |
+
revised = fm_resp.choices[0].message.content
|
79 |
|
80 |
+
# —— 2. 批改 —— #
|
81 |
corr_prompt = (
|
82 |
"请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
|
83 |
"原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
|
84 |
)
|
85 |
+
cm_resp = client.chat.completions.create(
|
86 |
model="gpt-4o-mini",
|
87 |
messages=[{"role": "user", "content": corr_prompt}]
|
88 |
)
|
89 |
+
corrected = cm_resp.choices[0].message.content
|
90 |
|
91 |
+
# —— 3. 批改意见 —— #
|
92 |
review_prompt = (
|
93 |
"下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" + corrected
|
94 |
)
|
95 |
+
rm_resp = client.chat.completions.create(
|
96 |
model="gpt-4o-mini",
|
97 |
messages=[{"role": "user", "content": review_prompt}]
|
98 |
)
|
99 |
+
review = rm_resp.choices[0].message.content
|
100 |
|
101 |
+
# —— 4. 评分 —— #
|
102 |
rate_prompt = (
|
103 |
"请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" + revised
|
104 |
)
|
105 |
+
rr_resp = client.chat.completions.create(
|
106 |
model="gpt-4o-mini",
|
107 |
messages=[{"role": "user", "content": rate_prompt}]
|
108 |
)
|
109 |
+
rating = rr_resp.choices[0].message.content
|
110 |
|
111 |
+
# —— 5. 优秀范文 —— #
|
112 |
rewrite_prompt = (
|
113 |
"请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
|
114 |
)
|
115 |
+
wm_resp = client.chat.completions.create(
|
116 |
model="gpt-4o-mini",
|
117 |
messages=[{"role": "user", "content": rewrite_prompt}]
|
118 |
)
|
119 |
+
perfect = wm_resp.choices[0].message.content
|
120 |
|
121 |
+
# —— 6. 渲染 HTML —— #
|
122 |
+
code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
123 |
tpl_path = os.path.join("app", "templates", "base.html")
|
124 |
with open(tpl_path, encoding="utf-8") as f:
|
125 |
tpl = Template(f.read())
|
126 |
|
127 |
html_content = (
|
128 |
+
"<h2>原文格式化</h2>" + markdown.markdown(revised) +
|
129 |
+
"<h2>批改结果</h2>" + highlight_brackets(corrected) +
|
130 |
+
"<h2>批改意见</h2>" + markdown.markdown(review) +
|
131 |
+
"<h2>评分</h2>" + markdown.markdown(rating) +
|
132 |
+
"<h2>优秀范文</h2>" + markdown.markdown(perfect)
|
|
|
|
|
|
|
|
|
|
|
133 |
)
|
134 |
full_html = tpl.render(code=code, content=html_content)
|
135 |
|
136 |
+
# —— 7. 写文件并返回 —— #
|
137 |
output_dir = os.path.join("app", "output")
|
138 |
os.makedirs(output_dir, exist_ok=True)
|
139 |
html_path = os.path.join(output_dir, f"{code}.html")
|
|
|
142 |
with open(html_path, "w", encoding="utf-8") as f:
|
143 |
f.write(full_html)
|
144 |
|
145 |
+
pdfkit.from_string(
|
146 |
+
full_html, pdf_path,
|
147 |
+
options={"enable-local-file-access": ""}
|
148 |
+
)
|
149 |
|
150 |
return full_html, html_path, pdf_path
|
151 |
|
152 |
# —— Gradio 接口 —— #
|
153 |
with gr.Blocks(title="英语作文批改") as demo:
|
154 |
gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
|
155 |
+
image_in = gr.Image(type="pil", label="上传照片")
|
156 |
output_html = gr.HTML()
|
157 |
+
btn = gr.Button("开始批改")
|
158 |
+
file_html = gr.File(label="下载 HTML")
|
159 |
+
file_pdf = gr.File(label="下载 PDF")
|
160 |
|
161 |
btn.click(
|
162 |
fn=process,
|