cacode commited on
Commit
a1d82a9
·
verified ·
1 Parent(s): 3af6c22

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +51 -30
app/main.py CHANGED
@@ -7,8 +7,9 @@ import gradio as gr
7
  import re
8
  import random
9
  from datetime import datetime
 
 
10
  from jinja2 import Template
11
- import urllib.parse
12
  import openai
13
 
14
  # —— 环境变量读取 —— #
@@ -30,31 +31,42 @@ def get_access_token(api_key, secret_key):
30
  resp.raise_for_status()
31
  return resp.json().get("access_token")
32
 
33
- def ocr_image(image_bytes, token):
34
- # Base64 encode and URL-encode the image
35
  img_b64 = base64.b64encode(image_bytes).decode()
36
- img_encoded = urllib.parse.quote_plus(img_b64)
37
- url = f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}"
38
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
39
- data = f"image={img_encoded}&language_type=ENG"
40
- resp = requests.post(url, headers=headers, data=data)
 
 
 
 
41
  resp.raise_for_status()
42
- return resp.json().get("words_result", [])
 
43
 
44
  # —— 文本高亮 —— #
45
- def highlight_brackets(text):
 
46
  text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
47
  text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
48
  return text.replace("\n", "<br>")
49
 
50
  # —— 主处理函数 —— #
51
- def process(image_bytes: bytes):
52
- # 1. OCR
 
 
 
 
 
53
  token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
54
  words = ocr_image(image_bytes, token)
55
  essay_text = "\n".join([w["words"] for w in words])
56
 
57
- # 2. 格式化原文
58
  fmt_prompt = (
59
  "请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
60
  "保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
@@ -66,7 +78,7 @@ def process(image_bytes: bytes):
66
  )
67
  revised = fm.choices[0].message.content
68
 
69
- # 3. 批改
70
  corr_prompt = (
71
  "请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
72
  "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
@@ -77,10 +89,9 @@ def process(image_bytes: bytes):
77
  )
78
  corrected = cm.choices[0].message.content
79
 
80
- # 4. 批改意见
81
  review_prompt = (
82
- "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n"
83
- + corrected
84
  )
85
  rm = openai.ChatCompletion.create(
86
  model="gpt-4o-mini",
@@ -88,10 +99,9 @@ def process(image_bytes: bytes):
88
  )
89
  review = rm.choices[0].message.content
90
 
91
- # 5. 评分
92
  rate_prompt = (
93
- "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n"
94
- + revised
95
  )
96
  rr = openai.ChatCompletion.create(
97
  model="gpt-4o-mini",
@@ -99,7 +109,7 @@ def process(image_bytes: bytes):
99
  )
100
  rating = rr.choices[0].message.content
101
 
102
- # 6. 优秀范文
103
  rewrite_prompt = (
104
  "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
105
  )
@@ -109,10 +119,12 @@ def process(image_bytes: bytes):
109
  )
110
  perfect = wm.choices[0].message.content
111
 
112
- # 7. 渲染 HTML
113
  code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
114
- with open("app/templates/base.html", encoding="utf-8") as f:
 
115
  tpl = Template(f.read())
 
116
  html_content = (
117
  "<h2>原文格式化</h2>"
118
  + markdown.markdown(revised)
@@ -127,25 +139,34 @@ def process(image_bytes: bytes):
127
  )
128
  full_html = tpl.render(code=code, content=html_content)
129
 
130
- # 写文件
131
- os.makedirs("app/output", exist_ok=True)
132
- html_path = f"app/output/{code}.html"
133
- pdf_path = f"app/output/{code}.pdf"
 
 
134
  with open(html_path, "w", encoding="utf-8") as f:
135
  f.write(full_html)
136
- pdfkit.from_string(full_html, pdf_path, options={"enable-local-file-access": ""})
 
 
137
 
138
  return full_html, html_path, pdf_path
139
 
140
  # —— Gradio 接口 —— #
141
  with gr.Blocks(title="英语作文批改") as demo:
142
  gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
143
- image_in = gr.Image(type="bytes", label="上传照片")
144
  output_html = gr.HTML()
145
  btn = gr.Button("开始批改")
146
  file_html = gr.File(label="下载 HTML")
147
  file_pdf = gr.File(label="下载 PDF")
148
- btn.click(fn=process, inputs=image_in, outputs=[output_html, file_html, file_pdf])
 
 
 
 
 
149
 
150
  if __name__ == "__main__":
151
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
7
  import re
8
  import random
9
  from datetime import datetime
10
+ from io import BytesIO
11
+ from urllib.parse import quote_plus
12
  from jinja2 import Template
 
13
  import openai
14
 
15
  # —— 环境变量读取 —— #
 
31
  resp.raise_for_status()
32
  return resp.json().get("access_token")
33
 
34
+ def ocr_image(image_bytes: bytes, token: str):
35
+ # Base64 编码后 URL 编码
36
  img_b64 = base64.b64encode(image_bytes).decode()
37
+ img_encoded = quote_plus(img_b64)
38
+ resp = requests.post(
39
+ f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
40
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
41
+ data={
42
+ "image": img_encoded,
43
+ "language_type": "ENG"
44
+ }
45
+ )
46
  resp.raise_for_status()
47
+ data = resp.json()
48
+ return data.get("words_result", [])
49
 
50
  # —— 文本高亮 —— #
51
+ def highlight_brackets(text: str) -> str:
52
+ # 中括号绿色;小括号红色
53
  text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
54
  text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
55
  return text.replace("\n", "<br>")
56
 
57
  # —— 主处理函数 —— #
58
+ def process(image_pil):
59
+ # 1. PIL 转 bytes
60
+ buf = BytesIO()
61
+ image_pil.save(buf, format="PNG")
62
+ image_bytes = buf.getvalue()
63
+
64
+ # 2. OCR
65
  token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
66
  words = ocr_image(image_bytes, token)
67
  essay_text = "\n".join([w["words"] for w in words])
68
 
69
+ # 3. 格式化原文
70
  fmt_prompt = (
71
  "请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
72
  "保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
 
78
  )
79
  revised = fm.choices[0].message.content
80
 
81
+ # 4. 批改
82
  corr_prompt = (
83
  "请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
84
  "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
 
89
  )
90
  corrected = cm.choices[0].message.content
91
 
92
+ # 5. 批改意见
93
  review_prompt = (
94
+ "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" + corrected
 
95
  )
96
  rm = openai.ChatCompletion.create(
97
  model="gpt-4o-mini",
 
99
  )
100
  review = rm.choices[0].message.content
101
 
102
+ # 6. 评分
103
  rate_prompt = (
104
+ "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" + revised
 
105
  )
106
  rr = openai.ChatCompletion.create(
107
  model="gpt-4o-mini",
 
109
  )
110
  rating = rr.choices[0].message.content
111
 
112
+ # 7. 优秀范文
113
  rewrite_prompt = (
114
  "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
115
  )
 
119
  )
120
  perfect = wm.choices[0].message.content
121
 
122
+ # 8. 渲染 HTML
123
  code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
124
+ tpl_path = os.path.join("app", "templates", "base.html")
125
+ with open(tpl_path, encoding="utf-8") as f:
126
  tpl = Template(f.read())
127
+
128
  html_content = (
129
  "<h2>原文格式化</h2>"
130
  + markdown.markdown(revised)
 
139
  )
140
  full_html = tpl.render(code=code, content=html_content)
141
 
142
+ # 9. 写文件并返回
143
+ output_dir = os.path.join("app", "output")
144
+ os.makedirs(output_dir, exist_ok=True)
145
+ html_path = os.path.join(output_dir, f"{code}.html")
146
+ pdf_path = os.path.join(output_dir, f"{code}.pdf")
147
+
148
  with open(html_path, "w", encoding="utf-8") as f:
149
  f.write(full_html)
150
+
151
+ pdfkit.from_string(full_html, pdf_path,
152
+ options={"enable-local-file-access": ""})
153
 
154
  return full_html, html_path, pdf_path
155
 
156
  # —— Gradio 接口 —— #
157
  with gr.Blocks(title="英语作文批改") as demo:
158
  gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
159
+ image_in = gr.Image(type="pil", label="上传照片")
160
  output_html = gr.HTML()
161
  btn = gr.Button("开始批改")
162
  file_html = gr.File(label="下载 HTML")
163
  file_pdf = gr.File(label="下载 PDF")
164
+
165
+ btn.click(
166
+ fn=process,
167
+ inputs=image_in,
168
+ outputs=[output_html, file_html, file_pdf]
169
+ )
170
 
171
  if __name__ == "__main__":
172
  demo.launch(server_name="0.0.0.0", server_port=7860)