syurein commited on
Commit
709c305
·
1 Parent(s): 124b732
LLM_package.py CHANGED
@@ -38,7 +38,7 @@ class MoondreamInference:
38
  obj["y_max"], obj["x_max"]
39
  ]
40
  })
41
- print(parsed)
42
  return parsed
43
 
44
 
@@ -56,22 +56,19 @@ class GeminiInference:
56
  client = genai.Client(api_key=self.api_key_source)
57
  my_file = client.files.upload(file=file_path)
58
  response = client.models.generate_content(
59
- model="gemini-2.0-flash",
60
  contents=[my_file, prompt],
61
  )
62
  return response.text
63
  def get_response_text(self,prompt):
64
  client = genai.Client(api_key=self.api_key_source)
65
  response = client.models.generate_content(
66
- model="gemini-2.0-flash",
67
  contents=[prompt],
68
  )
69
  text = response.text
70
  return text
71
  def parse(self, text):
72
- """
73
- レスポンス JSON をパース。'label' と 'box_2d'([0-1000]正規化) を取り出し、[0,1]正規化に変換して返すリスト。
74
- """
75
  json_str = text
76
  if '```json' in text:
77
  json_str = text[text.find('```json') + len('```json'):]
@@ -82,7 +79,9 @@ class GeminiInference:
82
  """
83
  レスポンス JSON をパース。'label' と 'box_2d'([0-1000]正規化) を取り出し、[0,1]正規化に変換して返すリスト。
84
  """
85
- print(text)
 
 
86
  json_str = text
87
  if '```json' in text:
88
  json_str = text[text.find('```json') + len('```json'):]
@@ -101,3 +100,41 @@ class GeminiInference:
101
  norm = [c / 1000.0 for c in coords]
102
  parsed.append({'label': obj['label'], 'box_2d': norm})
103
  return parsed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  obj["y_max"], obj["x_max"]
39
  ]
40
  })
41
+
42
  return parsed
43
 
44
 
 
56
  client = genai.Client(api_key=self.api_key_source)
57
  my_file = client.files.upload(file=file_path)
58
  response = client.models.generate_content(
59
+ model="gemini-2.5-pro",
60
  contents=[my_file, prompt],
61
  )
62
  return response.text
63
  def get_response_text(self,prompt):
64
  client = genai.Client(api_key=self.api_key_source)
65
  response = client.models.generate_content(
66
+ model="gemini-2.5-pro",
67
  contents=[prompt],
68
  )
69
  text = response.text
70
  return text
71
  def parse(self, text):
 
 
 
72
  json_str = text
73
  if '```json' in text:
74
  json_str = text[text.find('```json') + len('```json'):]
 
79
  """
80
  レスポンス JSON をパース。'label' と 'box_2d'([0-1000]正規化) を取り出し、[0,1]正規化に変換して返すリスト。
81
  """
82
+ print("GeminiInference.parse_response:", text)
83
+ if not text:
84
+ return {'state': 'empty'}
85
  json_str = text
86
  if '```json' in text:
87
  json_str = text[text.find('```json') + len('```json'):]
 
100
  norm = [c / 1000.0 for c in coords]
101
  parsed.append({'label': obj['label'], 'box_2d': norm})
102
  return parsed
103
+ class ObjectDetector:
104
+ def __init__(self, API_KEY=None):
105
+ self.model = GeminiInference(API_KEY)
106
+ self.prompt_objects=None
107
+ self.text=None
108
+
109
+ def detect_objects(self, image_path):
110
+ self.prompt= f"""
111
+ Detect all {self.prompt_objects} in the image. The box_2d should be [ymin, xmin, ymax, xmax] normalized to 0-1000.
112
+ Please provide the response as a JSON array of objects, where each object has a 'label' and 'box_2d' field.
113
+ Example:
114
+ [
115
+ {{"label": "face", "box_2d": [100, 200, 300, 400]}},
116
+ {{"label": "license_plate", "box_2d": [500, 600, 700, 800]}}
117
+ ]
118
+ """
119
+ print(self.prompt)
120
+ detected_objects_norm_0_1= self.model.parse_response(self.model.get_response(image_path, self.prompt))
121
+ return detected_objects_norm_0_1
122
+ """
123
+ Detects the danger level of the image.
124
+ """
125
+ def detect_danger_level(self, image_path):
126
+ analysis_prompt = f"""
127
+ 画像の個人情報漏洩リスクを分析し、厳密にJSON形式で返答してください。なおこの時、資料があれば、資料を参考にしてください:
128
+ {{
129
+ "risk_level": "high|medium|low",
130
+ "risk_reason": "リスクの具体的理由",
131
+ "objects_to_remove": ["消去すべきオブジェクトリスト(英語で、例: 'face', 'license_plate')"]
132
+ }}
133
+ <資料>
134
+ {self.text if self.text else "なし"}
135
+ </資料>
136
+ """
137
+
138
+ response = self.model.parse(self.model.get_response(image_path, analysis_prompt))
139
+ print(f"Response: {response}")
140
+ return response
__pycache__/LLM_package.cpython-312.pyc ADDED
Binary file (7.76 kB). View file
 
__pycache__/detector.cpython-312.pyc ADDED
Binary file (2.6 kB). View file
 
app.py CHANGED
@@ -17,6 +17,7 @@ import supervision as sv
17
  from PIL import Image, ImageFilter
18
  import numpy as np
19
  import cv2
 
20
  import pycocotools.mask as mask_util
21
  import insightface
22
  from fastapi import FastAPI, File, UploadFile, Form
@@ -227,10 +228,39 @@ import easyocr
227
 
228
 
229
 
230
- def llm_to_process_image(risk_level, image_path, point1, point2, thresholds=None):
 
231
  print('point1,point2', point1, point2)
 
232
  # 画像処理のロジックをここに追加
233
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  # 特殊な処理を行う関数
236
  def special_process_image_yolo(risk_level, image_path, point1, point2, thresholds=None):
@@ -838,6 +868,91 @@ async def mosaic_face(file: UploadFile = File(...)):
838
  # 一時ファイルをレスポンスとして返す
839
  return FileResponse(path=temp_file_path, media_type="image/jpeg", filename="mosaic_image.jpg")
840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841
 
842
  # Helper function to read image file
843
  def read_image(file: UploadFile):
 
17
  from PIL import Image, ImageFilter
18
  import numpy as np
19
  import cv2
20
+ from LLM_package import ObjectDetector,GeminiInference
21
  import pycocotools.mask as mask_util
22
  import insightface
23
  from fastapi import FastAPI, File, UploadFile, Form
 
228
 
229
 
230
 
231
+ def llm_to_process_image_simple(risk_level, image_path, point1, point2, thresholds=None):
232
+ print(risk_level, image_path, point1, point2, thresholds)
233
  print('point1,point2', point1, point2)
234
+ GEMINI_API_KEY=os.getenv('GEMINI_API_KEY')
235
  # 画像処理のロジックをここに追加
236
+ Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY)
237
+ debug_image_path='/test_llm.jpg'
238
+ Objectdetector.prompt_objects={'text', 'poster', 'Name tag', 'License plate', 'Digital screens',
239
+ 'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'}
240
+ # 画像の読み込みとRGB変換
241
+
242
+ image = cv2.imread(image_path)
243
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
244
+ mask_llm = np.zeros(image.shape[:2], dtype=np.uint8)
245
+ llm_results = Objectdetector.detect_objects(image_path)
246
+ for result in llm_results:
247
+ bbox=result['box_2d']
248
+ x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0])
249
+ x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0])
250
+ mask_llm[y1:y2, x1:x2] = 255 # テキスト領域をマスク
251
+ p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
252
+ p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
253
+ x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
254
+ x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
255
+ mask_llm[y_min:y_max, x_min:x_max] = 0 # 範囲を黒に設定
256
+ save_dir = "./saved_images"
257
+ os.makedirs(save_dir, exist_ok=True)
258
+ debug_image_pil = Image.fromarray(mask_llm)
259
+ debug_image_pil.save(save_dir + debug_image_path)
260
+ return save_dir + debug_image_path
261
+
262
+
263
+
264
 
265
  # 特殊な処理を行う関数
266
  def special_process_image_yolo(risk_level, image_path, point1, point2, thresholds=None):
 
868
  # 一時ファイルをレスポンスとして返す
869
  return FileResponse(path=temp_file_path, media_type="image/jpeg", filename="mosaic_image.jpg")
870
 
871
+
872
+
873
+
874
+
875
+
876
+
877
+
878
+ @app.post("/create-mask-and-inpaint-sum-llm-simple")
879
+ async def create_mask_sum(image: UploadFile = File(...), risk_level: int = Form(...),
880
+ x1: float = Form(...),
881
+ y1: float = Form(...),
882
+ x2: float = Form(...),
883
+ y2: float = Form(...),):
884
+ default_x = 0.001
885
+ default_y = 0.001
886
+
887
+
888
+ point1 = [default_x if math.isnan(x1) else x1, default_y if math.isnan(y1) else y1]
889
+
890
+ point2 = [default_x if math.isnan(x2) else x2, default_y if math.isnan(y2) else y2]
891
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
892
+ # 一意な識別子を生成
893
+ unique_id = uuid.uuid4().hex
894
+ input_path = save_image(image.file, f"./input_{timestamp}_{unique_id}.jpg")
895
+ mask_path = llm_to_process_image_simple(risk_level, input_path, point1, point2,thresholds=thresholds)
896
+ output_path = f"./output_simple_lama_{timestamp}_{unique_id}.jpg"
897
+ print('point1,point2',point1,point2)#消去したくない範囲のこと
898
+ # OpenCVでインペイント
899
+ inpaint_image_with_mask1(input_path, mask_path, output_path)
900
+
901
+ return FileResponse(output_path)
902
+
903
+ # カスケードファイルの読み込み (顔検出)
904
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
905
+
906
+ def apply_mosaic(image, x, y, w, h, mosaic_level=15):
907
+ """ 指定範囲にモザイク処理を適用 """
908
+ face = image[y:y+h, x:x+w]
909
+ face = cv2.resize(face, (w // mosaic_level, h // mosaic_level))
910
+ face = cv2.resize(face, (w, h), interpolation=cv2.INTER_NEAREST)
911
+ image[y:y+h, x:x+w] = face
912
+ return image
913
+
914
+ @app.post("/mosaic_face")
915
+ async def mosaic_face(file: UploadFile = File(...)):
916
+ # 画像ファイルを読み込み
917
+ image_data = await file.read()
918
+ np_array = np.frombuffer(image_data, np.uint8)
919
+ img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
920
+
921
+ # グレースケール変換と顔検出
922
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
923
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(30, 30))
924
+
925
+ # 検出した顔にモザイクを適用
926
+ for (x, y, w, h) in faces:
927
+ img = apply_mosaic(img, x, y, w, h)
928
+
929
+ # 一時ファイルに保存
930
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
931
+ temp_file_path = Path(temp_file.name)
932
+ cv2.imwrite(str(temp_file_path), img)
933
+
934
+ # 一時ファイルをレスポンスとして返す
935
+ return FileResponse(path=temp_file_path, media_type="image/jpeg", filename="mosaic_image.jpg")
936
+
937
+
938
+
939
+
940
+
941
+
942
+
943
+
944
+
945
+
946
+
947
+
948
+
949
+
950
+
951
+
952
+
953
+
954
+
955
+
956
 
957
  # Helper function to read image file
958
  def read_image(file: UploadFile):
detector.py CHANGED
@@ -16,21 +16,24 @@ class ObjectDetector:
16
  ]
17
  """
18
  def detect_objects(self, image_path):
19
- detected_objects_norm_0_1= self.model.parse_response(self.model.get_response(image_path, self.prompt)) return detected_objects_norm_0_1
 
 
 
 
20
  def detect_danger_level(self, image_path):
21
- """
22
- Detects the danger level of the image.
23
- """
24
  analysis_prompt = f"""
25
  画像の個人情報漏洩リスクを分析し、厳密にJSON形式で返答してください。なおこの時、資料があれば、資料を参考にしてください:
26
- {{
27
- "risk_level": "high|medium|low",
28
- "risk_reason": "リスクの具体的理由",
29
- "objects_to_remove": ["消去すべきオブジェクトリスト(英語で、例: 'face', 'license_plate')"]
30
- }}
31
- <資料>
32
- {self.text if self.text else "なし"}
33
- </資料>
34
- """
35
- response = json.loads(self.model.get_response_text(image_path, analysis_prompt))
 
 
36
  return response
 
16
  ]
17
  """
18
  def detect_objects(self, image_path):
19
+ detected_objects_norm_0_1= self.model.parse_response(self.model.get_response(image_path, self.prompt))
20
+ return detected_objects_norm_0_1
21
+ """
22
+ Detects the danger level of the image.
23
+ """
24
  def detect_danger_level(self, image_path):
 
 
 
25
  analysis_prompt = f"""
26
  画像の個人情報漏洩リスクを分析し、厳密にJSON形式で返答してください。なおこの時、資料があれば、資料を参考にしてください:
27
+ {{
28
+ "risk_level": "high|medium|low",
29
+ "risk_reason": "リスクの具体的理由",
30
+ "objects_to_remove": ["消去すべきオブジェクトリスト(英語で、例: 'face', 'license_plate')"]
31
+ }}
32
+ <資料>
33
+ {self.text if self.text else "なし"}
34
+ </資料>
35
+ """
36
+
37
+ response = self.model.parse(self.model.get_response(image_path, analysis_prompt))
38
+ print(f"Response: {response}")
39
  return response
requirements.txt CHANGED
@@ -72,4 +72,5 @@ uvicorn==0.32.0
72
  zipp==3.20.2
73
  supervision
74
  onnxruntime
75
- insightface
 
 
72
  zipp==3.20.2
73
  supervision
74
  onnxruntime
75
+
76
+ dotenv
saved_images/test_llm.jpg ADDED
test.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from LLM_package import ObjectDetector
2
+ import os
3
+ from dotenv import load_dotenv
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ load_dotenv(dotenv_path='../.env')
8
+ def llm_to_process_image(risk_level, image_path, point1, point2, thresholds=None):
9
+ print(risk_level, image_path, point1, point2, thresholds)
10
+ print('point1,point2', point1, point2)
11
+ GEMINI_API_KEY=os.getenv('GEMINI_API_KEY')
12
+ # 画像処理のロジックをここに追加
13
+ Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY)
14
+ debug_image_path='/test_llm.jpg'
15
+ Objectdetector.prompt_objects={'face', 'poster', 'Name tag', 'License plate', 'Digital screens',
16
+ 'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'}
17
+ # 画像の読み込みとRGB変換
18
+
19
+ image = cv2.imread(image_path)
20
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
21
+ mask_llm = np.zeros(image.shape[:2], dtype=np.uint8)
22
+ llm_results = Objectdetector.detect_objects(image_path)
23
+ for result in llm_results:
24
+ bbox=result['box_2d']
25
+ x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0])
26
+ x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0])
27
+ mask_llm[y1:y2, x1:x2] = 255 # テキスト領域をマスク
28
+ p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
29
+ p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
30
+ x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
31
+ x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
32
+ mask_llm[y_min:y_max, x_min:x_max] = 0 # 範囲を黒に設定
33
+ save_dir = "./saved_images"
34
+ os.makedirs(save_dir, exist_ok=True)
35
+ debug_image_pil = Image.fromarray(mask_llm)
36
+ debug_image_pil.save(save_dir + debug_image_path)
37
+
38
+ llm_to_process_image(50, "../../16508.jpg", (0, 0), (0, 0), thresholds=None)
39
+
test_llm.jpg ADDED