Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
import base64
|
5 |
+
import os
|
6 |
+
import openai
|
7 |
+
from datetime import datetime
|
8 |
+
import random
|
9 |
+
import re
|
10 |
+
import markdown
|
11 |
+
|
12 |
+
# 设置 API 密钥(通过 Hugging Face Space Secrets 配置)
|
13 |
+
API_KEY = os.getenv("BAIDU_API_KEY")
|
14 |
+
SECRET_KEY = os.getenv("BAIDU_SECRET_KEY")
|
15 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
16 |
+
|
17 |
+
# 百度 OCR 接口
|
18 |
+
def get_access_token(api_key, secret_key):
|
19 |
+
url = "https://aip.baidubce.com/oauth/2.0/token"
|
20 |
+
params = {
|
21 |
+
"grant_type": "client_credentials",
|
22 |
+
"client_id": api_key,
|
23 |
+
"client_secret": secret_key
|
24 |
+
}
|
25 |
+
response = requests.post(url, params=params)
|
26 |
+
return response.json().get("access_token")
|
27 |
+
|
28 |
+
def ocr_image(image_path, access_token):
|
29 |
+
with open(image_path, "rb") as f:
|
30 |
+
img_data = base64.b64encode(f.read()).decode()
|
31 |
+
|
32 |
+
url = f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={access_token}"
|
33 |
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
34 |
+
data = {
|
35 |
+
"image": img_data,
|
36 |
+
"language_type": "ENG"
|
37 |
+
}
|
38 |
+
response = requests.post(url, headers=headers, data=data)
|
39 |
+
return response.json()
|
40 |
+
|
41 |
+
# GPT 处理函数
|
42 |
+
def process_with_gpt(prompt):
|
43 |
+
client = openai.OpenAI(
|
44 |
+
api_key=OPENAI_API_KEY,
|
45 |
+
base_url="https://api.openai.com/v1"
|
46 |
+
)
|
47 |
+
|
48 |
+
completion = client.chat.completions.create(
|
49 |
+
model="gpt-4o-mini",
|
50 |
+
messages=[{"role": "user", "content": prompt}],
|
51 |
+
temperature=0.7
|
52 |
+
)
|
53 |
+
return completion.choices[0].message.content
|
54 |
+
|
55 |
+
# 主处理流程
|
56 |
+
def process_essay(image):
|
57 |
+
# 生成文件名
|
58 |
+
now = datetime.now().strftime("%Y%m%d%H%M%S")
|
59 |
+
code = f"{random.randint(0000,9999):04}"
|
60 |
+
file_name = "#" + str(code) + "-" + now
|
61 |
+
|
62 |
+
# 临时保存上传的文件
|
63 |
+
temp_path = f"temp_{code}.jpg"
|
64 |
+
with open(temp_path, "wb") as f:
|
65 |
+
f.write(image.read())
|
66 |
+
|
67 |
+
try:
|
68 |
+
# OCR 识别
|
69 |
+
access_token = get_access_token(API_KEY, SECRET_KEY)
|
70 |
+
result = ocr_image(temp_path, access_token)
|
71 |
+
|
72 |
+
if "words_result" not in result:
|
73 |
+
return "OCR 识别失败,请检查图片内容"
|
74 |
+
|
75 |
+
essay_text = "\n".join([item["words"] for item in result["words_result"]])
|
76 |
+
|
77 |
+
# 格式整理
|
78 |
+
format_prompt = (
|
79 |
+
"请帮我整理下面的英语作文文本格式,只整理英文正文部分(忽略英文正文外所有部分),"
|
80 |
+
"保证原汁原味(明显错误空格换行、乱码、非常用字符比如☰需要改正除外),"
|
81 |
+
"出现的拼写错误也不要帮助改正:\n\n" + essay_text
|
82 |
+
)
|
83 |
+
revised_text = process_with_gpt(format_prompt)
|
84 |
+
|
85 |
+
# 语法纠正
|
86 |
+
correction_prompt = (
|
87 |
+
"请帮我把下面的英语作文的语法错误改正,输出改正后的文章(只改错误和不流畅之处),"
|
88 |
+
"请参照下面的格式要求\n"
|
89 |
+
"格式要求:原文修改部分用()括起来,修改的部分用[]括起来,修改单词括单词、"
|
90 |
+
"修改短语括短语、修改句子括句子,括的部分精准一些,能反映问题\n"
|
91 |
+
"示例:The (rabbish) [rubbish] thrown by visitors has piled up and its lush (verdure no longer flourish) [verdure no longer flourishes] as it (did once) [once did].\n\n"
|
92 |
+
"需要批改的英语习作:\n" + revised_text
|
93 |
+
)
|
94 |
+
corrected_text = process_with_gpt(correction_prompt)
|
95 |
+
|
96 |
+
# 批改意见
|
97 |
+
assessment_prompt = """
|
98 |
+
下面是一份已经批改过的英语作文(改正了一些语法错误和不流畅不准确之处),其中小括号表示原文的错误,
|
99 |
+
中括号表示原文的修改部分。请你根据修改的批注,按照如下格式给出批改意见:
|
100 |
+
> 英文有错误的原文1
|
101 |
+
- 错误1和解决方案
|
102 |
+
- 错误2和解决方案,后面的以此类推
|
103 |
+
示例:
|
104 |
+
> The rabbish thrown by visitors has piled up and its lush verdure no longer flourish as it did once.
|
105 |
+
- rabbish:拼写错误
|
106 |
+
- verdure no longer flourish:动词单复数错误,其中verdure是单数,动词使用第三人称单数
|
107 |
+
- as it did once:语序错误
|
108 |
+
|
109 |
+
实际批改内容:
|
110 |
+
""" + corrected_text
|
111 |
+
|
112 |
+
assessment_text = process_with_gpt(assessment_prompt)
|
113 |
+
|
114 |
+
# 评分系统
|
115 |
+
rate_prompt = """
|
116 |
+
你是资深英语写作批改专家,按以下三个维度评分(F/B-/B/B+/A-/A/A+等级):
|
117 |
+
## 语言通顺度(Fluency)&可读性与风格(Readability & Style)
|
118 |
+
## 上下文连贯度(Coherence)
|
119 |
+
## 词汇多样性(Lexical Resource)&语法准确性(Grammatical Accuracy)
|
120 |
+
要求:1. 给出评分等级 2. 详细分析原因 3. 提出改进方向
|
121 |
+
示例格式:
|
122 |
+
## 语言通顺度 B+
|
123 |
+
- 优点说明...
|
124 |
+
- 缺点说明...
|
125 |
+
- 改进建议...
|
126 |
+
|
127 |
+
需要评分的作文:
|
128 |
+
""" + revised_text
|
129 |
+
|
130 |
+
rate_text = process_with_gpt(rate_prompt)
|
131 |
+
|
132 |
+
# 优秀范文
|
133 |
+
rewrite_prompt = """
|
134 |
+
请使用优秀的英语表达重写下面这篇英文习作,要展现优秀的词汇和语法,使用地道的表达方式,
|
135 |
+
使用多样化的句式、短语和词汇,加粗可供学习的部分。
|
136 |
+
原文:
|
137 |
+
""" + revised_text
|
138 |
+
|
139 |
+
perfect_text = process_with_gpt(rewrite_prompt)
|
140 |
+
|
141 |
+
# 推荐表达
|
142 |
+
theme_prompt = """
|
143 |
+
请分析作文内容并给出主题相关的优秀表达:
|
144 |
+
1. 推测作文主题
|
145 |
+
2. 分析具体内容
|
146 |
+
3. 提供相关优秀表达(包含句子、句型、短语、词汇)
|
147 |
+
|
148 |
+
需要分析的作文:
|
149 |
+
""" + revised_text
|
150 |
+
|
151 |
+
theme_text = process_with_gpt(theme_prompt)
|
152 |
+
theme_text = theme_text.split("## 可用表达")[1] if "## 可用表达" in theme_text else ""
|
153 |
+
|
154 |
+
# HTML 生成
|
155 |
+
def highlight_brackets(text):
|
156 |
+
text = re.sub(r'\[([^\[\]]+)\]', r'<span style="background:#d4f7d4">\1</span>', text)
|
157 |
+
text = re.sub(r'\(([^\(\)]+)\)', r'<span style="background:#ffd6d6">\1</span>', text)
|
158 |
+
return text.replace('\n', '<br>')
|
159 |
+
|
160 |
+
# 图片 Base64 编码
|
161 |
+
with open(temp_path, "rb") as img_f:
|
162 |
+
img_b64 = base64.b64encode(img_f.read()).decode()
|
163 |
+
img_html = f'<img src="data:image/jpeg;base64,{img_b64}" style="width:100%;max-height:600px;object-fit:contain;margin:20px 0">'
|
164 |
+
|
165 |
+
# 组合最终 HTML
|
166 |
+
final_html = f"""
|
167 |
+
<html>
|
168 |
+
<style>
|
169 |
+
body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }}
|
170 |
+
.section {{ margin-bottom: 30px; }}
|
171 |
+
code {{ background: #f5f5f5; padding: 2px 4px; border-radius: 4px; }}
|
172 |
+
pre {{ background: #f5f5f5; padding: 10px; overflow-x: auto; }}
|
173 |
+
blockquote {{ border-left: 4px solid #ddd; padding-left: 12px; color: #666; }}
|
174 |
+
</style>
|
175 |
+
<body>
|
176 |
+
<h1>作文批改结果 #{code}</h1>
|
177 |
+
{img_html}
|
178 |
+
|
179 |
+
<div class="section">
|
180 |
+
<h2>批改结果</h2>
|
181 |
+
<div style="font-family:Consolas,monospace;">{highlight_brackets(corrected_text)}</div>
|
182 |
+
</div>
|
183 |
+
|
184 |
+
<div class="section">
|
185 |
+
<h2>批改意见</h2>
|
186 |
+
<div style="font-family:Consolas,monospace;">{markdown.markdown(assessment_text)}</div>
|
187 |
+
</div>
|
188 |
+
|
189 |
+
<div class="section">
|
190 |
+
<h2>评分分析</h2>
|
191 |
+
<div style="font-family:Consolas,monospace;">{markdown.markdown(rate_text)}</div>
|
192 |
+
</div>
|
193 |
+
|
194 |
+
<div class="section">
|
195 |
+
<h2>优秀范文</h2>
|
196 |
+
<div style="font-family:Consolas,monospace;">{markdown.markdown(perfect_text)}</div>
|
197 |
+
</div>
|
198 |
+
|
199 |
+
<div class="section">
|
200 |
+
<h2>推荐表达</h2>
|
201 |
+
<div style="font-family:Consolas,monospace;">{markdown.markdown(theme_text)}</div>
|
202 |
+
</div>
|
203 |
+
</body>
|
204 |
+
</html>
|
205 |
+
"""
|
206 |
+
|
207 |
+
return final_html
|
208 |
+
|
209 |
+
finally:
|
210 |
+
# 清理临时文件
|
211 |
+
if os.path.exists(temp_path):
|
212 |
+
os.remove(temp_path)
|
213 |
+
|
214 |
+
# 创建 Gradio 界面
|
215 |
+
def create_interface():
|
216 |
+
description = """
|
217 |
+
英语作文批改助手
|
218 |
+
上传一张包含英语作文的手写图片,系统将自动进行文字识别并提供详细的批改建议、评分和优秀范文。
|
219 |
+
"""
|
220 |
+
|
221 |
+
interface = gr.Interface(
|
222 |
+
fn=process_essay,
|
223 |
+
inputs=gr.File(label="上传作文图片(JPG/PNG)", type="file"),
|
224 |
+
outputs=gr.HTML(label="批改结果"),
|
225 |
+
title="英语作文智能批改",
|
226 |
+
description=description,
|
227 |
+
examples=[["example.jpg"]],
|
228 |
+
cache_examples=False,
|
229 |
+
theme="default"
|
230 |
+
)
|
231 |
+
|
232 |
+
return interface
|
233 |
+
|
234 |
+
if __name__ == "__main__":
|
235 |
+
create_interface().launch()
|