Create app/main.py
Browse files- app/main.py +148 -0
app/main.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import requests
|
4 |
+
import markdown
|
5 |
+
import pdfkit
|
6 |
+
import gradio as gr
|
7 |
+
import re
|
8 |
+
import random
|
9 |
+
from datetime import datetime
|
10 |
+
from jinja2 import Template
|
11 |
+
import openai
|
12 |
+
|
13 |
+
# —— 环境变量读取 —— #
|
14 |
+
BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "")
|
15 |
+
BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
|
16 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
17 |
+
openai.api_key = OPENAI_API_KEY
|
18 |
+
|
19 |
+
# —— Baidu OCR —— #
|
20 |
+
def get_access_token(api_key, secret_key):
|
21 |
+
resp = requests.post(
|
22 |
+
"https://aip.baidubce.com/oauth/2.0/token",
|
23 |
+
params={
|
24 |
+
"grant_type":"client_credentials",
|
25 |
+
"client_id": api_key,
|
26 |
+
"client_secret": secret_key
|
27 |
+
}
|
28 |
+
)
|
29 |
+
return resp.json().get("access_token")
|
30 |
+
|
31 |
+
def ocr_image(image_bytes, token):
|
32 |
+
img_b64 = base64.b64encode(image_bytes).decode()
|
33 |
+
resp = requests.post(
|
34 |
+
f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
|
35 |
+
headers={"Content-Type":"application/x-www-form-urlencoded"},
|
36 |
+
data={"image": img_b64, "language_type":"ENG"}
|
37 |
+
)
|
38 |
+
return resp.json().get("words_result", [])
|
39 |
+
|
40 |
+
# —— 文本高亮 —— #
|
41 |
+
def highlight_brackets(text):
|
42 |
+
# 中括号绿色;小括号红色
|
43 |
+
text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
|
44 |
+
text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
|
45 |
+
return text.replace("\n", "<br>")
|
46 |
+
|
47 |
+
# —— 主处理函数 —— #
|
48 |
+
def process(image):
|
49 |
+
# 1. OCR
|
50 |
+
token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
|
51 |
+
words = ocr_image(image.read(), token)
|
52 |
+
essay_text = "\n".join([w["words"] for w in words])
|
53 |
+
|
54 |
+
# 2. 格式化原文
|
55 |
+
fmt_prompt = (
|
56 |
+
"请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
|
57 |
+
"保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
|
58 |
+
"拼写错误保留:\n\n" + essay_text
|
59 |
+
)
|
60 |
+
fm = openai.ChatCompletion.create(
|
61 |
+
model="gpt-4o-mini",
|
62 |
+
messages=[{"role":"user","content":fmt_prompt}]
|
63 |
+
)
|
64 |
+
revised = fm.choices[0].message.content
|
65 |
+
|
66 |
+
# 3. 批改
|
67 |
+
corr_prompt = (
|
68 |
+
"请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
|
69 |
+
"原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
|
70 |
+
)
|
71 |
+
cm = openai.ChatCompletion.create(
|
72 |
+
model="gpt-4o-mini",
|
73 |
+
messages=[{"role":"user","content":corr_prompt}]
|
74 |
+
)
|
75 |
+
corrected = cm.choices[0].message.content
|
76 |
+
|
77 |
+
# 4. 批改意见
|
78 |
+
review_prompt = (
|
79 |
+
"下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n"
|
80 |
+
+ corrected
|
81 |
+
)
|
82 |
+
rm = openai.ChatCompletion.create(
|
83 |
+
model="gpt-4o-mini",
|
84 |
+
messages=[{"role":"user","content":review_prompt}]
|
85 |
+
)
|
86 |
+
review = rm.choices[0].message.content
|
87 |
+
|
88 |
+
# 5. 评分
|
89 |
+
rate_prompt = (
|
90 |
+
"请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n"
|
91 |
+
+ revised
|
92 |
+
)
|
93 |
+
rr = openai.ChatCompletion.create(
|
94 |
+
model="gpt-4o-mini",
|
95 |
+
messages=[{"role":"user","content":rate_prompt}]
|
96 |
+
)
|
97 |
+
rating = rr.choices[0].message.content
|
98 |
+
|
99 |
+
# 6. 优秀范文
|
100 |
+
rewrite_prompt = (
|
101 |
+
"请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
|
102 |
+
)
|
103 |
+
wm = openai.ChatCompletion.create(
|
104 |
+
model="gpt-4o-mini",
|
105 |
+
messages=[{"role":"user","content":rewrite_prompt}]
|
106 |
+
)
|
107 |
+
perfect = wm.choices[0].message.content
|
108 |
+
|
109 |
+
# 7. 渲染 HTML
|
110 |
+
code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
111 |
+
with open("app/templates/base.html", encoding="utf-8") as f:
|
112 |
+
tpl = Template(f.read())
|
113 |
+
html_content = (
|
114 |
+
"<h2>原文格式化</h2>"
|
115 |
+
+ markdown.markdown(revised)
|
116 |
+
+ "<h2>批改结果</h2>"
|
117 |
+
+ highlight_brackets(corrected)
|
118 |
+
+ "<h2>批改意见</h2>"
|
119 |
+
+ markdown.markdown(review)
|
120 |
+
+ "<h2>评分</h2>"
|
121 |
+
+ markdown.markdown(rating)
|
122 |
+
+ "<h2>优秀范文</h2>"
|
123 |
+
+ markdown.markdown(perfect)
|
124 |
+
)
|
125 |
+
full_html = tpl.render(code=code, content=html_content)
|
126 |
+
|
127 |
+
# 写文件
|
128 |
+
html_path = f"/app/output/{code}.html"
|
129 |
+
pdf_path = f"/app/output/{code}.pdf"
|
130 |
+
os.makedirs("/app/output", exist_ok=True)
|
131 |
+
with open(html_path, "w", encoding="utf-8") as f:
|
132 |
+
f.write(full_html)
|
133 |
+
pdfkit.from_string(full_html, pdf_path, options={"enable-local-file-access":""})
|
134 |
+
|
135 |
+
return full_html, html_path, pdf_path
|
136 |
+
|
137 |
+
# —— Gradio 接口 —— #
|
138 |
+
with gr.Blocks(title="英语作文批改") as demo:
|
139 |
+
gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
|
140 |
+
image_in = gr.File(file_count="single", label="上传照片")
|
141 |
+
output_html = gr.HTML()
|
142 |
+
btn = gr.Button("开始批改")
|
143 |
+
file_html = gr.File(label="下载 HTML")
|
144 |
+
file_pdf = gr.File(label="下载 PDF")
|
145 |
+
btn.click(fn=process, inputs=image_in, outputs=[output_html, file_html, file_pdf])
|
146 |
+
|
147 |
+
if __name__ == "__main__":
|
148 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|