Spaces:
Sleeping
Sleeping
genai model
Browse files- app.py +76 -2
- requirements.txt +2 -1
app.py
CHANGED
@@ -19,6 +19,11 @@ from google.oauth2.service_account import Credentials
|
|
19 |
import vertexai
|
20 |
from vertexai.generative_models import GenerativeModel
|
21 |
|
|
|
|
|
|
|
|
|
|
|
22 |
# From other files
|
23 |
from storage_service import GoogleCloudStorage
|
24 |
from assignment_ui import create_assignment_ui
|
@@ -69,6 +74,13 @@ vertexai.init(
|
|
69 |
credentials=google_creds,
|
70 |
)
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
_AssignmentService = AssignmentService(GCS_SERVICE)
|
73 |
_SubmissionService = SubmissionService(GCS_SERVICE)
|
74 |
_DashboardService = DashboardService(_AssignmentService, _SubmissionService)
|
@@ -2272,7 +2284,69 @@ def download_content(content):
|
|
2272 |
# OCR
|
2273 |
def extract_text_from_image(image):
|
2274 |
"""從上傳的圖片中提取文字"""
|
2275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2276 |
|
2277 |
# 使用 OCR 工具
|
2278 |
def extract_text_from_file(file):
|
@@ -3947,7 +4021,7 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
|
|
3947 |
with gr.Column():
|
3948 |
chinese_full_paragraph_input = gr.Textbox(label="輸入段落全文", lines=5)
|
3949 |
with gr.Row():
|
3950 |
-
with gr.Column(scale=1):
|
3951 |
# 修正文件類型設置
|
3952 |
chinese_file_upload = gr.File(
|
3953 |
label="上傳文件",
|
|
|
19 |
import vertexai
|
20 |
from vertexai.generative_models import GenerativeModel
|
21 |
|
22 |
+
from google import genai
|
23 |
+
from google.genai import types
|
24 |
+
import base64
|
25 |
+
import io
|
26 |
+
|
27 |
# From other files
|
28 |
from storage_service import GoogleCloudStorage
|
29 |
from assignment_ui import create_assignment_ui
|
|
|
74 |
credentials=google_creds,
|
75 |
)
|
76 |
|
77 |
+
GENAI_CLIENT = genai.Client(
|
78 |
+
vertexai=True,
|
79 |
+
project="junyiacademy",
|
80 |
+
location='us-central1',
|
81 |
+
credentials=google_creds
|
82 |
+
)
|
83 |
+
|
84 |
_AssignmentService = AssignmentService(GCS_SERVICE)
|
85 |
_SubmissionService = SubmissionService(GCS_SERVICE)
|
86 |
_DashboardService = DashboardService(_AssignmentService, _SubmissionService)
|
|
|
2284 |
# OCR
|
2285 |
def extract_text_from_image(image):
|
2286 |
"""從上傳的圖片中提取文字"""
|
2287 |
+
|
2288 |
+
# 檢查是否有圖片
|
2289 |
+
if image is None:
|
2290 |
+
return ""
|
2291 |
+
|
2292 |
+
# 將圖片轉換為 base64 編碼
|
2293 |
+
buffered = io.BytesIO()
|
2294 |
+
image.save(buffered, format="JPEG")
|
2295 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
2296 |
+
|
2297 |
+
# 準備圖片輸入
|
2298 |
+
image_part = types.Part.from_bytes(
|
2299 |
+
data=base64.b64decode(img_str),
|
2300 |
+
mime_type="image/jpeg",
|
2301 |
+
)
|
2302 |
+
|
2303 |
+
# 設定模型和內容
|
2304 |
+
model = "gemini-2.0-pro-exp-02-05"
|
2305 |
+
contents = [
|
2306 |
+
types.Content(
|
2307 |
+
role="user",
|
2308 |
+
parts=[
|
2309 |
+
image_part,
|
2310 |
+
types.Part.from_text(text="verify the text and return the text only. do not return any other text and check the text is correct or not.")
|
2311 |
+
]
|
2312 |
+
)
|
2313 |
+
]
|
2314 |
+
|
2315 |
+
# 設定生成配置
|
2316 |
+
generate_content_config = types.GenerateContentConfig(
|
2317 |
+
temperature=1,
|
2318 |
+
top_p=0.95,
|
2319 |
+
max_output_tokens=8192,
|
2320 |
+
response_modalities=["TEXT"],
|
2321 |
+
safety_settings=[
|
2322 |
+
types.SafetySetting(
|
2323 |
+
category="HARM_CATEGORY_HATE_SPEECH",
|
2324 |
+
threshold="OFF"
|
2325 |
+
),
|
2326 |
+
types.SafetySetting(
|
2327 |
+
category="HARM_CATEGORY_DANGEROUS_CONTENT",
|
2328 |
+
threshold="OFF"
|
2329 |
+
),
|
2330 |
+
types.SafetySetting(
|
2331 |
+
category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
2332 |
+
threshold="OFF"
|
2333 |
+
),
|
2334 |
+
types.SafetySetting(
|
2335 |
+
category="HARM_CATEGORY_HARASSMENT",
|
2336 |
+
threshold="OFF"
|
2337 |
+
)
|
2338 |
+
],
|
2339 |
+
)
|
2340 |
+
|
2341 |
+
# 生成內容
|
2342 |
+
response = GENAI_CLIENT.models.generate_content(
|
2343 |
+
model=model,
|
2344 |
+
contents=contents,
|
2345 |
+
config=generate_content_config,
|
2346 |
+
)
|
2347 |
+
|
2348 |
+
# 返回識別的文字
|
2349 |
+
return response.text
|
2350 |
|
2351 |
# 使用 OCR 工具
|
2352 |
def extract_text_from_file(file):
|
|
|
4021 |
with gr.Column():
|
4022 |
chinese_full_paragraph_input = gr.Textbox(label="輸入段落全文", lines=5)
|
4023 |
with gr.Row():
|
4024 |
+
with gr.Column(scale=1, visible=False):
|
4025 |
# 修正文件類型設置
|
4026 |
chinese_file_upload = gr.File(
|
4027 |
label="上傳文件",
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ google-auth-httplib2
|
|
6 |
google-auth-oauthlib
|
7 |
google-cloud-storage
|
8 |
google-cloud-bigquery
|
9 |
-
vertexai
|
|
|
|
6 |
google-auth-oauthlib
|
7 |
google-cloud-storage
|
8 |
google-cloud-bigquery
|
9 |
+
vertexai
|
10 |
+
google-genai
|