Spaces:
Running
Running
阳渠
commited on
Commit
·
1a06ab4
1
Parent(s):
9a3f6f1
Update
Browse files- app.py +7 -4
- requirements.txt +8 -1
app.py
CHANGED
@@ -12,11 +12,12 @@ import gradio as gr
|
|
12 |
from datetime import datetime
|
13 |
from modelscope.pipelines import pipeline
|
14 |
from modelscope import snapshot_download
|
|
|
15 |
from PIL import Image, ImageDraw, ImageFont
|
16 |
|
17 |
from PCAgent.api import inference_chat
|
18 |
from PCAgent.icon_localization import det
|
19 |
-
from PCAgent.
|
20 |
from PCAgent.prompt_qwen import get_subtask_prompt as get_subtask_prompt
|
21 |
from PCAgent.chat import init_action_chat, init_memory_chat, add_response
|
22 |
from PCAgent.prompt_qwen import get_action_prompt, get_process_prompt, get_memory_prompt
|
@@ -26,8 +27,10 @@ vl_model_version = os.environ.get('vl_model_version')
|
|
26 |
llm_model_version = os.environ.get('llm_model_version')
|
27 |
API_url = os.environ.get('API_url')
|
28 |
token = os.environ.get('token')
|
29 |
-
os.environ["OCR_ACCESS_KEY_ID"] = os.environ.get('OCR_ACCESS_KEY_ID')
|
30 |
-
os.environ["OCR_ACCESS_KEY_SECRET"] = os.environ.get('OCR_ACCESS_KEY_SECRET')
|
|
|
|
|
31 |
tff_file = os.environ.get('tff_file')
|
32 |
radius = 100
|
33 |
|
@@ -127,7 +130,7 @@ def get_perception_infos(screenshot_file, screenshot_som_file, font_path):
|
|
127 |
|
128 |
for i, img in enumerate(img_list):
|
129 |
width, height = Image.open(img).size
|
130 |
-
sub_text, sub_coordinates = ocr(img) # for api
|
131 |
for coordinate in sub_coordinates:
|
132 |
coordinate[0] = int(max(0, img_x_list[i] + coordinate[0] - padding))
|
133 |
coordinate[2] = int(min(total_width, img_x_list[i] + coordinate[2] + padding))
|
|
|
12 |
from datetime import datetime
|
13 |
from modelscope.pipelines import pipeline
|
14 |
from modelscope import snapshot_download
|
15 |
+
from modelscope.utils.constant import Tasks
|
16 |
from PIL import Image, ImageDraw, ImageFont
|
17 |
|
18 |
from PCAgent.api import inference_chat
|
19 |
from PCAgent.icon_localization import det
|
20 |
+
from PCAgent.text_localization_old import ocr
|
21 |
from PCAgent.prompt_qwen import get_subtask_prompt as get_subtask_prompt
|
22 |
from PCAgent.chat import init_action_chat, init_memory_chat, add_response
|
23 |
from PCAgent.prompt_qwen import get_action_prompt, get_process_prompt, get_memory_prompt
|
|
|
27 |
llm_model_version = os.environ.get('llm_model_version')
|
28 |
API_url = os.environ.get('API_url')
|
29 |
token = os.environ.get('token')
|
30 |
+
# os.environ["OCR_ACCESS_KEY_ID"] = os.environ.get('OCR_ACCESS_KEY_ID')
|
31 |
+
# os.environ["OCR_ACCESS_KEY_SECRET"] = os.environ.get('OCR_ACCESS_KEY_SECRET')
|
32 |
+
ocr_detection = pipeline(Tasks.ocr_detection, model='damo/cv_resnet18_ocr-detection-line-level_damo')
|
33 |
+
ocr_recognition = pipeline(Tasks.ocr_recognition, model='damo/cv_convnextTiny_ocr-recognition-document_damo')
|
34 |
tff_file = os.environ.get('tff_file')
|
35 |
radius = 100
|
36 |
|
|
|
130 |
|
131 |
for i, img in enumerate(img_list):
|
132 |
width, height = Image.open(img).size
|
133 |
+
sub_text, sub_coordinates = ocr(img, ocr_detection, ocr_recognition) # for api
|
134 |
for coordinate in sub_coordinates:
|
135 |
coordinate[0] = int(max(0, img_x_list[i] + coordinate[0] - padding))
|
136 |
coordinate[2] = int(min(total_width, img_x_list[i] + coordinate[2] + padding))
|
requirements.txt
CHANGED
@@ -11,4 +11,11 @@ transformers
|
|
11 |
torchvision
|
12 |
pycocotools
|
13 |
timm
|
14 |
-
termcolor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
torchvision
|
12 |
pycocotools
|
13 |
timm
|
14 |
+
termcolor
|
15 |
+
TensorFlow==2.9.1
|
16 |
+
keras==2.9.0
|
17 |
+
SentencePiece
|
18 |
+
tf_slim
|
19 |
+
tf_keras==2.15.0
|
20 |
+
pyclipper
|
21 |
+
numpy==1.26.4
|