cacode commited on
Commit
70fc399
·
verified ·
1 Parent(s): a1d82a9

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +42 -46
app/main.py CHANGED
@@ -10,13 +10,15 @@ from datetime import datetime
10
  from io import BytesIO
11
  from urllib.parse import quote_plus
12
  from jinja2 import Template
13
- import openai
14
 
15
  # —— 环境变量读取 —— #
16
  BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "")
17
  BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
18
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
19
- openai.api_key = OPENAI_API_KEY
 
 
20
 
21
  # —— Baidu OCR —— #
22
  def get_access_token(api_key, secret_key):
@@ -32,9 +34,8 @@ def get_access_token(api_key, secret_key):
32
  return resp.json().get("access_token")
33
 
34
  def ocr_image(image_bytes: bytes, token: str):
35
- # Base64 编码后 URL 编码
36
- img_b64 = base64.b64encode(image_bytes).decode()
37
- img_encoded = quote_plus(img_b64)
38
  resp = requests.post(
39
  f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
40
  headers={"Content-Type": "application/x-www-form-urlencoded"},
@@ -44,102 +45,95 @@ def ocr_image(image_bytes: bytes, token: str):
44
  }
45
  )
46
  resp.raise_for_status()
47
- data = resp.json()
48
- return data.get("words_result", [])
49
 
50
  # —— 文本高亮 —— #
51
  def highlight_brackets(text: str) -> str:
52
- # 中括号绿色;小括号红色
53
  text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
54
  text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
55
  return text.replace("\n", "<br>")
56
 
57
  # —— 主处理函数 —— #
58
  def process(image_pil):
59
- # 1. PIL 转 bytes
60
  buf = BytesIO()
61
  image_pil.save(buf, format="PNG")
62
  image_bytes = buf.getvalue()
63
 
64
- # 2. OCR
65
- token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
66
- words = ocr_image(image_bytes, token)
67
  essay_text = "\n".join([w["words"] for w in words])
68
 
69
- # 3. 格式化原文
70
  fmt_prompt = (
71
  "请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
72
  "保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
73
  "拼写错误保留:\n\n" + essay_text
74
  )
75
- fm = openai.ChatCompletion.create(
76
  model="gpt-4o-mini",
77
  messages=[{"role": "user", "content": fmt_prompt}]
78
  )
79
- revised = fm.choices[0].message.content
80
 
81
- # 4. 批改
82
  corr_prompt = (
83
  "请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
84
  "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
85
  )
86
- cm = openai.ChatCompletion.create(
87
  model="gpt-4o-mini",
88
  messages=[{"role": "user", "content": corr_prompt}]
89
  )
90
- corrected = cm.choices[0].message.content
91
 
92
- # 5. 批改意见
93
  review_prompt = (
94
  "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" + corrected
95
  )
96
- rm = openai.ChatCompletion.create(
97
  model="gpt-4o-mini",
98
  messages=[{"role": "user", "content": review_prompt}]
99
  )
100
- review = rm.choices[0].message.content
101
 
102
- # 6. 评分
103
  rate_prompt = (
104
  "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" + revised
105
  )
106
- rr = openai.ChatCompletion.create(
107
  model="gpt-4o-mini",
108
  messages=[{"role": "user", "content": rate_prompt}]
109
  )
110
- rating = rr.choices[0].message.content
111
 
112
- # 7. 优秀范文
113
  rewrite_prompt = (
114
  "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
115
  )
116
- wm = openai.ChatCompletion.create(
117
  model="gpt-4o-mini",
118
  messages=[{"role": "user", "content": rewrite_prompt}]
119
  )
120
- perfect = wm.choices[0].message.content
121
 
122
- # 8. 渲染 HTML
123
- code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
124
  tpl_path = os.path.join("app", "templates", "base.html")
125
  with open(tpl_path, encoding="utf-8") as f:
126
  tpl = Template(f.read())
127
 
128
  html_content = (
129
- "<h2>原文格式化</h2>"
130
- + markdown.markdown(revised)
131
- + "<h2>批改结果</h2>"
132
- + highlight_brackets(corrected)
133
- + "<h2>批改意见</h2>"
134
- + markdown.markdown(review)
135
- + "<h2>评分</h2>"
136
- + markdown.markdown(rating)
137
- + "<h2>优秀范文</h2>"
138
- + markdown.markdown(perfect)
139
  )
140
  full_html = tpl.render(code=code, content=html_content)
141
 
142
- # 9. 写文件并返回
143
  output_dir = os.path.join("app", "output")
144
  os.makedirs(output_dir, exist_ok=True)
145
  html_path = os.path.join(output_dir, f"{code}.html")
@@ -148,19 +142,21 @@ def process(image_pil):
148
  with open(html_path, "w", encoding="utf-8") as f:
149
  f.write(full_html)
150
 
151
- pdfkit.from_string(full_html, pdf_path,
152
- options={"enable-local-file-access": ""})
 
 
153
 
154
  return full_html, html_path, pdf_path
155
 
156
  # —— Gradio 接口 —— #
157
  with gr.Blocks(title="英语作文批改") as demo:
158
  gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
159
- image_in = gr.Image(type="pil", label="上传照片")
160
  output_html = gr.HTML()
161
- btn = gr.Button("开始批改")
162
- file_html = gr.File(label="下载 HTML")
163
- file_pdf = gr.File(label="下载 PDF")
164
 
165
  btn.click(
166
  fn=process,
 
10
  from io import BytesIO
11
  from urllib.parse import quote_plus
12
  from jinja2 import Template
13
+ from openai import OpenAI
14
 
15
  # —— 环境变量读取 —— #
16
  BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "")
17
  BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
18
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
19
+
20
+ # —— 初始化 OpenAI v1 客户端 —— #
21
+ client = OpenAI(api_key=OPENAI_API_KEY)
22
 
23
  # —— Baidu OCR —— #
24
  def get_access_token(api_key, secret_key):
 
34
  return resp.json().get("access_token")
35
 
36
  def ocr_image(image_bytes: bytes, token: str):
37
+ img_b64 = base64.b64encode(image_bytes).decode()
38
+ img_encoded = quote_plus(img_b64) # URL 编码
 
39
  resp = requests.post(
40
  f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
41
  headers={"Content-Type": "application/x-www-form-urlencoded"},
 
45
  }
46
  )
47
  resp.raise_for_status()
48
+ return resp.json().get("words_result", [])
 
49
 
50
  # —— 文本高亮 —— #
51
  def highlight_brackets(text: str) -> str:
 
52
  text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
53
  text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
54
  return text.replace("\n", "<br>")
55
 
56
  # —— 主处理函数 —— #
57
  def process(image_pil):
58
+ # —— PIL 转 bytes —— #
59
  buf = BytesIO()
60
  image_pil.save(buf, format="PNG")
61
  image_bytes = buf.getvalue()
62
 
63
+ # —— OCR 提取原文 —— #
64
+ token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
65
+ words = ocr_image(image_bytes, token)
66
  essay_text = "\n".join([w["words"] for w in words])
67
 
68
+ # —— 1. 格式化原文 —— #
69
  fmt_prompt = (
70
  "请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
71
  "保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
72
  "拼写错误保留:\n\n" + essay_text
73
  )
74
+ fm_resp = client.chat.completions.create(
75
  model="gpt-4o-mini",
76
  messages=[{"role": "user", "content": fmt_prompt}]
77
  )
78
+ revised = fm_resp.choices[0].message.content
79
 
80
+ # —— 2. 批改 —— #
81
  corr_prompt = (
82
  "请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
83
  "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
84
  )
85
+ cm_resp = client.chat.completions.create(
86
  model="gpt-4o-mini",
87
  messages=[{"role": "user", "content": corr_prompt}]
88
  )
89
+ corrected = cm_resp.choices[0].message.content
90
 
91
+ # —— 3. 批改意见 —— #
92
  review_prompt = (
93
  "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" + corrected
94
  )
95
+ rm_resp = client.chat.completions.create(
96
  model="gpt-4o-mini",
97
  messages=[{"role": "user", "content": review_prompt}]
98
  )
99
+ review = rm_resp.choices[0].message.content
100
 
101
+ # —— 4. 评分 —— #
102
  rate_prompt = (
103
  "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" + revised
104
  )
105
+ rr_resp = client.chat.completions.create(
106
  model="gpt-4o-mini",
107
  messages=[{"role": "user", "content": rate_prompt}]
108
  )
109
+ rating = rr_resp.choices[0].message.content
110
 
111
+ # —— 5. 优秀范文 —— #
112
  rewrite_prompt = (
113
  "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
114
  )
115
+ wm_resp = client.chat.completions.create(
116
  model="gpt-4o-mini",
117
  messages=[{"role": "user", "content": rewrite_prompt}]
118
  )
119
+ perfect = wm_resp.choices[0].message.content
120
 
121
+ # —— 6. 渲染 HTML —— #
122
+ code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
123
  tpl_path = os.path.join("app", "templates", "base.html")
124
  with open(tpl_path, encoding="utf-8") as f:
125
  tpl = Template(f.read())
126
 
127
  html_content = (
128
+ "<h2>原文格式化</h2>" + markdown.markdown(revised) +
129
+ "<h2>批改结果</h2>" + highlight_brackets(corrected) +
130
+ "<h2>批改意见</h2>" + markdown.markdown(review) +
131
+ "<h2>评分</h2>" + markdown.markdown(rating) +
132
+ "<h2>优秀范文</h2>" + markdown.markdown(perfect)
 
 
 
 
 
133
  )
134
  full_html = tpl.render(code=code, content=html_content)
135
 
136
+ # —— 7. 写文件并返回 —— #
137
  output_dir = os.path.join("app", "output")
138
  os.makedirs(output_dir, exist_ok=True)
139
  html_path = os.path.join(output_dir, f"{code}.html")
 
142
  with open(html_path, "w", encoding="utf-8") as f:
143
  f.write(full_html)
144
 
145
+ pdfkit.from_string(
146
+ full_html, pdf_path,
147
+ options={"enable-local-file-access": ""}
148
+ )
149
 
150
  return full_html, html_path, pdf_path
151
 
152
  # —— Gradio 接口 —— #
153
  with gr.Blocks(title="英语作文批改") as demo:
154
  gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
155
+ image_in = gr.Image(type="pil", label="上传照片")
156
  output_html = gr.HTML()
157
+ btn = gr.Button("开始批改")
158
+ file_html = gr.File(label="下载 HTML")
159
+ file_pdf = gr.File(label="下载 PDF")
160
 
161
  btn.click(
162
  fn=process,