cacode commited on
Commit
ffa3318
·
verified ·
1 Parent(s): d817435

Create app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +148 -0
app/main.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import requests
4
+ import markdown
5
+ import pdfkit
6
+ import gradio as gr
7
+ import re
8
+ import random
9
+ from datetime import datetime
10
+ from jinja2 import Template
11
+ import openai
12
+
13
+ # —— 环境变量读取 —— #
14
+ BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "")
15
+ BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
16
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
17
+ openai.api_key = OPENAI_API_KEY
18
+
19
+ # —— Baidu OCR —— #
20
+ def get_access_token(api_key, secret_key):
21
+ resp = requests.post(
22
+ "https://aip.baidubce.com/oauth/2.0/token",
23
+ params={
24
+ "grant_type":"client_credentials",
25
+ "client_id": api_key,
26
+ "client_secret": secret_key
27
+ }
28
+ )
29
+ return resp.json().get("access_token")
30
+
31
+ def ocr_image(image_bytes, token):
32
+ img_b64 = base64.b64encode(image_bytes).decode()
33
+ resp = requests.post(
34
+ f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
35
+ headers={"Content-Type":"application/x-www-form-urlencoded"},
36
+ data={"image": img_b64, "language_type":"ENG"}
37
+ )
38
+ return resp.json().get("words_result", [])
39
+
40
+ # —— 文本高亮 —— #
41
+ def highlight_brackets(text):
42
+ # 中括号绿色;小括号红色
43
+ text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
44
+ text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
45
+ return text.replace("\n", "<br>")
46
+
47
+ # —— 主处理函数 —— #
48
+ def process(image):
49
+ # 1. OCR
50
+ token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
51
+ words = ocr_image(image.read(), token)
52
+ essay_text = "\n".join([w["words"] for w in words])
53
+
54
+ # 2. 格式化原文
55
+ fmt_prompt = (
56
+ "请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
57
+ "保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
58
+ "拼写错误保留:\n\n" + essay_text
59
+ )
60
+ fm = openai.ChatCompletion.create(
61
+ model="gpt-4o-mini",
62
+ messages=[{"role":"user","content":fmt_prompt}]
63
+ )
64
+ revised = fm.choices[0].message.content
65
+
66
+ # 3. 批改
67
+ corr_prompt = (
68
+ "请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
69
+ "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
70
+ )
71
+ cm = openai.ChatCompletion.create(
72
+ model="gpt-4o-mini",
73
+ messages=[{"role":"user","content":corr_prompt}]
74
+ )
75
+ corrected = cm.choices[0].message.content
76
+
77
+ # 4. 批改意见
78
+ review_prompt = (
79
+ "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n"
80
+ + corrected
81
+ )
82
+ rm = openai.ChatCompletion.create(
83
+ model="gpt-4o-mini",
84
+ messages=[{"role":"user","content":review_prompt}]
85
+ )
86
+ review = rm.choices[0].message.content
87
+
88
+ # 5. 评分
89
+ rate_prompt = (
90
+ "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n"
91
+ + revised
92
+ )
93
+ rr = openai.ChatCompletion.create(
94
+ model="gpt-4o-mini",
95
+ messages=[{"role":"user","content":rate_prompt}]
96
+ )
97
+ rating = rr.choices[0].message.content
98
+
99
+ # 6. 优秀范文
100
+ rewrite_prompt = (
101
+ "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
102
+ )
103
+ wm = openai.ChatCompletion.create(
104
+ model="gpt-4o-mini",
105
+ messages=[{"role":"user","content":rewrite_prompt}]
106
+ )
107
+ perfect = wm.choices[0].message.content
108
+
109
+ # 7. 渲染 HTML
110
+ code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
111
+ with open("app/templates/base.html", encoding="utf-8") as f:
112
+ tpl = Template(f.read())
113
+ html_content = (
114
+ "<h2>原文格式化</h2>"
115
+ + markdown.markdown(revised)
116
+ + "<h2>批改结果</h2>"
117
+ + highlight_brackets(corrected)
118
+ + "<h2>批改意见</h2>"
119
+ + markdown.markdown(review)
120
+ + "<h2>评分</h2>"
121
+ + markdown.markdown(rating)
122
+ + "<h2>优秀范文</h2>"
123
+ + markdown.markdown(perfect)
124
+ )
125
+ full_html = tpl.render(code=code, content=html_content)
126
+
127
+ # 写文件
128
+ html_path = f"/app/output/{code}.html"
129
+ pdf_path = f"/app/output/{code}.pdf"
130
+ os.makedirs("/app/output", exist_ok=True)
131
+ with open(html_path, "w", encoding="utf-8") as f:
132
+ f.write(full_html)
133
+ pdfkit.from_string(full_html, pdf_path, options={"enable-local-file-access":""})
134
+
135
+ return full_html, html_path, pdf_path
136
+
137
+ # —— Gradio 接口 —— #
138
+ with gr.Blocks(title="英语作文批改") as demo:
139
+ gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
140
+ image_in = gr.File(file_count="single", label="上传照片")
141
+ output_html = gr.HTML()
142
+ btn = gr.Button("开始批改")
143
+ file_html = gr.File(label="下载 HTML")
144
+ file_pdf = gr.File(label="下载 PDF")
145
+ btn.click(fn=process, inputs=image_in, outputs=[output_html, file_html, file_pdf])
146
+
147
+ if __name__ == "__main__":
148
+ demo.launch(server_name="0.0.0.0", server_port=7860)