Spaces:
Running
Running
syurein
commited on
Commit
·
709c305
1
Parent(s):
124b732
修正
Browse files- LLM_package.py +44 -7
- __pycache__/LLM_package.cpython-312.pyc +0 -0
- __pycache__/detector.cpython-312.pyc +0 -0
- app.py +117 -2
- detector.py +17 -14
- requirements.txt +2 -1
- saved_images/test_llm.jpg +0 -0
- test.py +39 -0
- test_llm.jpg +0 -0
LLM_package.py
CHANGED
@@ -38,7 +38,7 @@ class MoondreamInference:
|
|
38 |
obj["y_max"], obj["x_max"]
|
39 |
]
|
40 |
})
|
41 |
-
|
42 |
return parsed
|
43 |
|
44 |
|
@@ -56,22 +56,19 @@ class GeminiInference:
|
|
56 |
client = genai.Client(api_key=self.api_key_source)
|
57 |
my_file = client.files.upload(file=file_path)
|
58 |
response = client.models.generate_content(
|
59 |
-
model="gemini-2.
|
60 |
contents=[my_file, prompt],
|
61 |
)
|
62 |
return response.text
|
63 |
def get_response_text(self,prompt):
|
64 |
client = genai.Client(api_key=self.api_key_source)
|
65 |
response = client.models.generate_content(
|
66 |
-
model="gemini-2.
|
67 |
contents=[prompt],
|
68 |
)
|
69 |
text = response.text
|
70 |
return text
|
71 |
def parse(self, text):
|
72 |
-
"""
|
73 |
-
レスポンス JSON をパース。'label' と 'box_2d'([0-1000]正規化) を取り出し、[0,1]正規化に変換して返すリスト。
|
74 |
-
"""
|
75 |
json_str = text
|
76 |
if '```json' in text:
|
77 |
json_str = text[text.find('```json') + len('```json'):]
|
@@ -82,7 +79,9 @@ class GeminiInference:
|
|
82 |
"""
|
83 |
レスポンス JSON をパース。'label' と 'box_2d'([0-1000]正規化) を取り出し、[0,1]正規化に変換して返すリスト。
|
84 |
"""
|
85 |
-
print(text)
|
|
|
|
|
86 |
json_str = text
|
87 |
if '```json' in text:
|
88 |
json_str = text[text.find('```json') + len('```json'):]
|
@@ -101,3 +100,41 @@ class GeminiInference:
|
|
101 |
norm = [c / 1000.0 for c in coords]
|
102 |
parsed.append({'label': obj['label'], 'box_2d': norm})
|
103 |
return parsed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
obj["y_max"], obj["x_max"]
|
39 |
]
|
40 |
})
|
41 |
+
|
42 |
return parsed
|
43 |
|
44 |
|
|
|
56 |
client = genai.Client(api_key=self.api_key_source)
|
57 |
my_file = client.files.upload(file=file_path)
|
58 |
response = client.models.generate_content(
|
59 |
+
model="gemini-2.5-pro",
|
60 |
contents=[my_file, prompt],
|
61 |
)
|
62 |
return response.text
|
63 |
def get_response_text(self,prompt):
|
64 |
client = genai.Client(api_key=self.api_key_source)
|
65 |
response = client.models.generate_content(
|
66 |
+
model="gemini-2.5-pro",
|
67 |
contents=[prompt],
|
68 |
)
|
69 |
text = response.text
|
70 |
return text
|
71 |
def parse(self, text):
|
|
|
|
|
|
|
72 |
json_str = text
|
73 |
if '```json' in text:
|
74 |
json_str = text[text.find('```json') + len('```json'):]
|
|
|
79 |
"""
|
80 |
レスポンス JSON をパース。'label' と 'box_2d'([0-1000]正規化) を取り出し、[0,1]正規化に変換して返すリスト。
|
81 |
"""
|
82 |
+
print("GeminiInference.parse_response:", text)
|
83 |
+
if not text:
|
84 |
+
return {'state': 'empty'}
|
85 |
json_str = text
|
86 |
if '```json' in text:
|
87 |
json_str = text[text.find('```json') + len('```json'):]
|
|
|
100 |
norm = [c / 1000.0 for c in coords]
|
101 |
parsed.append({'label': obj['label'], 'box_2d': norm})
|
102 |
return parsed
|
103 |
+
class ObjectDetector:
|
104 |
+
def __init__(self, API_KEY=None):
|
105 |
+
self.model = GeminiInference(API_KEY)
|
106 |
+
self.prompt_objects=None
|
107 |
+
self.text=None
|
108 |
+
|
109 |
+
def detect_objects(self, image_path):
|
110 |
+
self.prompt= f"""
|
111 |
+
Detect all {self.prompt_objects} in the image. The box_2d should be [ymin, xmin, ymax, xmax] normalized to 0-1000.
|
112 |
+
Please provide the response as a JSON array of objects, where each object has a 'label' and 'box_2d' field.
|
113 |
+
Example:
|
114 |
+
[
|
115 |
+
{{"label": "face", "box_2d": [100, 200, 300, 400]}},
|
116 |
+
{{"label": "license_plate", "box_2d": [500, 600, 700, 800]}}
|
117 |
+
]
|
118 |
+
"""
|
119 |
+
print(self.prompt)
|
120 |
+
detected_objects_norm_0_1= self.model.parse_response(self.model.get_response(image_path, self.prompt))
|
121 |
+
return detected_objects_norm_0_1
|
122 |
+
"""
|
123 |
+
Detects the danger level of the image.
|
124 |
+
"""
|
125 |
+
def detect_danger_level(self, image_path):
|
126 |
+
analysis_prompt = f"""
|
127 |
+
画像の個人情報漏洩リスクを分析し、厳密にJSON形式で返答してください。なおこの時、資料があれば、資料を参考にしてください:
|
128 |
+
{{
|
129 |
+
"risk_level": "high|medium|low",
|
130 |
+
"risk_reason": "リスクの具体的理由",
|
131 |
+
"objects_to_remove": ["消去すべきオブジェクトリスト(英語で、例: 'face', 'license_plate')"]
|
132 |
+
}}
|
133 |
+
<資料>
|
134 |
+
{self.text if self.text else "なし"}
|
135 |
+
</資料>
|
136 |
+
"""
|
137 |
+
|
138 |
+
response = self.model.parse(self.model.get_response(image_path, analysis_prompt))
|
139 |
+
print(f"Response: {response}")
|
140 |
+
return response
|
__pycache__/LLM_package.cpython-312.pyc
ADDED
Binary file (7.76 kB). View file
|
|
__pycache__/detector.cpython-312.pyc
ADDED
Binary file (2.6 kB). View file
|
|
app.py
CHANGED
@@ -17,6 +17,7 @@ import supervision as sv
|
|
17 |
from PIL import Image, ImageFilter
|
18 |
import numpy as np
|
19 |
import cv2
|
|
|
20 |
import pycocotools.mask as mask_util
|
21 |
import insightface
|
22 |
from fastapi import FastAPI, File, UploadFile, Form
|
@@ -227,10 +228,39 @@ import easyocr
|
|
227 |
|
228 |
|
229 |
|
230 |
-
def
|
|
|
231 |
print('point1,point2', point1, point2)
|
|
|
232 |
# 画像処理のロジックをここに追加
|
233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
# 特殊な処理を行う関数
|
236 |
def special_process_image_yolo(risk_level, image_path, point1, point2, thresholds=None):
|
@@ -838,6 +868,91 @@ async def mosaic_face(file: UploadFile = File(...)):
|
|
838 |
# 一時ファイルをレスポンスとして返す
|
839 |
return FileResponse(path=temp_file_path, media_type="image/jpeg", filename="mosaic_image.jpg")
|
840 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
841 |
|
842 |
# Helper function to read image file
|
843 |
def read_image(file: UploadFile):
|
|
|
17 |
from PIL import Image, ImageFilter
|
18 |
import numpy as np
|
19 |
import cv2
|
20 |
+
from LLM_package import ObjectDetector,GeminiInference
|
21 |
import pycocotools.mask as mask_util
|
22 |
import insightface
|
23 |
from fastapi import FastAPI, File, UploadFile, Form
|
|
|
228 |
|
229 |
|
230 |
|
231 |
+
def llm_to_process_image_simple(risk_level, image_path, point1, point2, thresholds=None):
|
232 |
+
print(risk_level, image_path, point1, point2, thresholds)
|
233 |
print('point1,point2', point1, point2)
|
234 |
+
GEMINI_API_KEY=os.getenv('GEMINI_API_KEY')
|
235 |
# 画像処理のロジックをここに追加
|
236 |
+
Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY)
|
237 |
+
debug_image_path='/test_llm.jpg'
|
238 |
+
Objectdetector.prompt_objects={'text', 'poster', 'Name tag', 'License plate', 'Digital screens',
|
239 |
+
'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'}
|
240 |
+
# 画像の読み込みとRGB変換
|
241 |
+
|
242 |
+
image = cv2.imread(image_path)
|
243 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
244 |
+
mask_llm = np.zeros(image.shape[:2], dtype=np.uint8)
|
245 |
+
llm_results = Objectdetector.detect_objects(image_path)
|
246 |
+
for result in llm_results:
|
247 |
+
bbox=result['box_2d']
|
248 |
+
x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0])
|
249 |
+
x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0])
|
250 |
+
mask_llm[y1:y2, x1:x2] = 255 # テキスト領域をマスク
|
251 |
+
p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
|
252 |
+
p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
|
253 |
+
x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
|
254 |
+
x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
|
255 |
+
mask_llm[y_min:y_max, x_min:x_max] = 0 # 範囲を黒に設定
|
256 |
+
save_dir = "./saved_images"
|
257 |
+
os.makedirs(save_dir, exist_ok=True)
|
258 |
+
debug_image_pil = Image.fromarray(mask_llm)
|
259 |
+
debug_image_pil.save(save_dir + debug_image_path)
|
260 |
+
return save_dir + debug_image_path
|
261 |
+
|
262 |
+
|
263 |
+
|
264 |
|
265 |
# 特殊な処理を行う関数
|
266 |
def special_process_image_yolo(risk_level, image_path, point1, point2, thresholds=None):
|
|
|
868 |
# 一時ファイルをレスポンスとして返す
|
869 |
return FileResponse(path=temp_file_path, media_type="image/jpeg", filename="mosaic_image.jpg")
|
870 |
|
871 |
+
|
872 |
+
|
873 |
+
|
874 |
+
|
875 |
+
|
876 |
+
|
877 |
+
|
878 |
+
@app.post("/create-mask-and-inpaint-sum-llm-simple")
|
879 |
+
async def create_mask_sum(image: UploadFile = File(...), risk_level: int = Form(...),
|
880 |
+
x1: float = Form(...),
|
881 |
+
y1: float = Form(...),
|
882 |
+
x2: float = Form(...),
|
883 |
+
y2: float = Form(...),):
|
884 |
+
default_x = 0.001
|
885 |
+
default_y = 0.001
|
886 |
+
|
887 |
+
|
888 |
+
point1 = [default_x if math.isnan(x1) else x1, default_y if math.isnan(y1) else y1]
|
889 |
+
|
890 |
+
point2 = [default_x if math.isnan(x2) else x2, default_y if math.isnan(y2) else y2]
|
891 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
892 |
+
# 一意な識別子を生成
|
893 |
+
unique_id = uuid.uuid4().hex
|
894 |
+
input_path = save_image(image.file, f"./input_{timestamp}_{unique_id}.jpg")
|
895 |
+
mask_path = llm_to_process_image_simple(risk_level, input_path, point1, point2,thresholds=thresholds)
|
896 |
+
output_path = f"./output_simple_lama_{timestamp}_{unique_id}.jpg"
|
897 |
+
print('point1,point2',point1,point2)#消去したくない範囲のこと
|
898 |
+
# OpenCVでインペイント
|
899 |
+
inpaint_image_with_mask1(input_path, mask_path, output_path)
|
900 |
+
|
901 |
+
return FileResponse(output_path)
|
902 |
+
|
903 |
+
# カスケードファイルの読み込み (顔検出)
|
904 |
+
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
905 |
+
|
906 |
+
def apply_mosaic(image, x, y, w, h, mosaic_level=15):
|
907 |
+
""" 指定範囲にモザイク処理を適用 """
|
908 |
+
face = image[y:y+h, x:x+w]
|
909 |
+
face = cv2.resize(face, (w // mosaic_level, h // mosaic_level))
|
910 |
+
face = cv2.resize(face, (w, h), interpolation=cv2.INTER_NEAREST)
|
911 |
+
image[y:y+h, x:x+w] = face
|
912 |
+
return image
|
913 |
+
|
914 |
+
@app.post("/mosaic_face")
|
915 |
+
async def mosaic_face(file: UploadFile = File(...)):
|
916 |
+
# 画像ファイルを読み込み
|
917 |
+
image_data = await file.read()
|
918 |
+
np_array = np.frombuffer(image_data, np.uint8)
|
919 |
+
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
|
920 |
+
|
921 |
+
# グレースケール変換と顔検出
|
922 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
923 |
+
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(30, 30))
|
924 |
+
|
925 |
+
# 検出した顔にモザイクを適用
|
926 |
+
for (x, y, w, h) in faces:
|
927 |
+
img = apply_mosaic(img, x, y, w, h)
|
928 |
+
|
929 |
+
# 一時ファイルに保存
|
930 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
931 |
+
temp_file_path = Path(temp_file.name)
|
932 |
+
cv2.imwrite(str(temp_file_path), img)
|
933 |
+
|
934 |
+
# 一時ファイルをレスポンスとして返す
|
935 |
+
return FileResponse(path=temp_file_path, media_type="image/jpeg", filename="mosaic_image.jpg")
|
936 |
+
|
937 |
+
|
938 |
+
|
939 |
+
|
940 |
+
|
941 |
+
|
942 |
+
|
943 |
+
|
944 |
+
|
945 |
+
|
946 |
+
|
947 |
+
|
948 |
+
|
949 |
+
|
950 |
+
|
951 |
+
|
952 |
+
|
953 |
+
|
954 |
+
|
955 |
+
|
956 |
|
957 |
# Helper function to read image file
|
958 |
def read_image(file: UploadFile):
|
detector.py
CHANGED
@@ -16,21 +16,24 @@ class ObjectDetector:
|
|
16 |
]
|
17 |
"""
|
18 |
def detect_objects(self, image_path):
|
19 |
-
detected_objects_norm_0_1= self.model.parse_response(self.model.get_response(image_path, self.prompt))
|
|
|
|
|
|
|
|
|
20 |
def detect_danger_level(self, image_path):
|
21 |
-
"""
|
22 |
-
Detects the danger level of the image.
|
23 |
-
"""
|
24 |
analysis_prompt = f"""
|
25 |
画像の個人情報漏洩リスクを分析し、厳密にJSON形式で返答してください。なおこの時、資料があれば、資料を参考にしてください:
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
36 |
return response
|
|
|
16 |
]
|
17 |
"""
|
18 |
def detect_objects(self, image_path):
|
19 |
+
detected_objects_norm_0_1= self.model.parse_response(self.model.get_response(image_path, self.prompt))
|
20 |
+
return detected_objects_norm_0_1
|
21 |
+
"""
|
22 |
+
Detects the danger level of the image.
|
23 |
+
"""
|
24 |
def detect_danger_level(self, image_path):
|
|
|
|
|
|
|
25 |
analysis_prompt = f"""
|
26 |
画像の個人情報漏洩リスクを分析し、厳密にJSON形式で返答してください。なおこの時、資料があれば、資料を参考にしてください:
|
27 |
+
{{
|
28 |
+
"risk_level": "high|medium|low",
|
29 |
+
"risk_reason": "リスクの具体的理由",
|
30 |
+
"objects_to_remove": ["消去すべきオブジェクトリスト(英語で、例: 'face', 'license_plate')"]
|
31 |
+
}}
|
32 |
+
<資料>
|
33 |
+
{self.text if self.text else "なし"}
|
34 |
+
</資料>
|
35 |
+
"""
|
36 |
+
|
37 |
+
response = self.model.parse(self.model.get_response(image_path, analysis_prompt))
|
38 |
+
print(f"Response: {response}")
|
39 |
return response
|
requirements.txt
CHANGED
@@ -72,4 +72,5 @@ uvicorn==0.32.0
|
|
72 |
zipp==3.20.2
|
73 |
supervision
|
74 |
onnxruntime
|
75 |
-
|
|
|
|
72 |
zipp==3.20.2
|
73 |
supervision
|
74 |
onnxruntime
|
75 |
+
|
76 |
+
dotenv
|
saved_images/test_llm.jpg
ADDED
![]() |
test.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from LLM_package import ObjectDetector
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
load_dotenv(dotenv_path='../.env')
|
8 |
+
def llm_to_process_image(risk_level, image_path, point1, point2, thresholds=None):
|
9 |
+
print(risk_level, image_path, point1, point2, thresholds)
|
10 |
+
print('point1,point2', point1, point2)
|
11 |
+
GEMINI_API_KEY=os.getenv('GEMINI_API_KEY')
|
12 |
+
# 画像処理のロジックをここに追加
|
13 |
+
Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY)
|
14 |
+
debug_image_path='/test_llm.jpg'
|
15 |
+
Objectdetector.prompt_objects={'face', 'poster', 'Name tag', 'License plate', 'Digital screens',
|
16 |
+
'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'}
|
17 |
+
# 画像の読み込みとRGB変換
|
18 |
+
|
19 |
+
image = cv2.imread(image_path)
|
20 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
21 |
+
mask_llm = np.zeros(image.shape[:2], dtype=np.uint8)
|
22 |
+
llm_results = Objectdetector.detect_objects(image_path)
|
23 |
+
for result in llm_results:
|
24 |
+
bbox=result['box_2d']
|
25 |
+
x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0])
|
26 |
+
x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0])
|
27 |
+
mask_llm[y1:y2, x1:x2] = 255 # テキスト領域をマスク
|
28 |
+
p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
|
29 |
+
p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
|
30 |
+
x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
|
31 |
+
x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
|
32 |
+
mask_llm[y_min:y_max, x_min:x_max] = 0 # 範囲を黒に設定
|
33 |
+
save_dir = "./saved_images"
|
34 |
+
os.makedirs(save_dir, exist_ok=True)
|
35 |
+
debug_image_pil = Image.fromarray(mask_llm)
|
36 |
+
debug_image_pil.save(save_dir + debug_image_path)
|
37 |
+
|
38 |
+
llm_to_process_image(50, "../../16508.jpg", (0, 0), (0, 0), thresholds=None)
|
39 |
+
|
test_llm.jpg
ADDED
![]() |