Rishi Desai commited on
Commit
34c6239
·
1 Parent(s): 25777fe

cleanup; using HF_Home

Browse files
Files changed (5) hide show
  1. .env +0 -0
  2. README.md +2 -11
  3. demo.py +1 -3
  4. helpers.py +0 -190
  5. install.py +1 -2
.env DELETED
File without changes
README.md CHANGED
@@ -64,21 +64,11 @@ Using the ComfyUI workflows is the fastest way to get started. Run `python run_c
64
  - `./workflows/FaceEmbedDist.json` for computing the face embedding distance
65
 
66
 
67
- <!-- ## Configuration
68
-
69
- Create a .env file in the project root directory with your API keys:
70
- ```
71
- touch .env
72
- echo "FAL_API_KEY=your_fal_api_key_here" >> .env
73
- ```
74
-
75
- The FAL API key is used for face upscaling during preprocessing. You can get one at [fal.ai](https://fal.ai/). -->
76
-
77
  ## Gradio Demo
78
 
79
  A simple web interface for the face enhancement workflow.
80
 
81
- 1. Run `python gradio_demo.py`
82
 
83
  2. Go to http://localhost:7860. You may need to enable port forwarding.
84
 
@@ -87,6 +77,7 @@ A simple web interface for the face enhancement workflow.
87
  - Gradio demo is faster than the script because models remain loaded in memory
88
  - All images are saved in `./ComfyUI/input/scratch/`
89
  - Temporary files are created during processing and cleaned up afterward
 
90
 
91
  ### Troubleshooting
92
 
 
64
  - `./workflows/FaceEmbedDist.json` for computing the face embedding distance
65
 
66
 
 
 
 
 
 
 
 
 
 
 
67
  ## Gradio Demo
68
 
69
  A simple web interface for the face enhancement workflow.
70
 
71
+ 1. Run `python demo.py`
72
 
73
  2. Go to http://localhost:7860. You may need to enable port forwarding.
74
 
 
77
  - Gradio demo is faster than the script because models remain loaded in memory
78
  - All images are saved in `./ComfyUI/input/scratch/`
79
  - Temporary files are created during processing and cleaned up afterward
80
+ - Face cropping and upscaling are not applied to the reference image; this will be added in an update.
81
 
82
  ### Troubleshooting
83
 
demo.py CHANGED
@@ -2,8 +2,6 @@ from install import install
2
 
3
  # Global variable to track if install() has been run
4
  INSTALLED = False
5
-
6
- # Check if install() has been run before calling it
7
  if not INSTALLED:
8
  install()
9
  INSTALLED = True
@@ -18,7 +16,7 @@ import pathlib
18
  import sys
19
  from main import process_face
20
  from PIL import Image
21
- PORT = 7860
22
  CACHE_DIR = "./cache"
23
 
24
  # Ensure cache directory exists
 
2
 
3
  # Global variable to track if install() has been run
4
  INSTALLED = False
 
 
5
  if not INSTALLED:
6
  install()
7
  INSTALLED = True
 
16
  import sys
17
  from main import process_face
18
  from PIL import Image
19
+
20
  CACHE_DIR = "./cache"
21
 
22
  # Ensure cache directory exists
helpers.py DELETED
@@ -1,190 +0,0 @@
1
- import os
2
- import torch
3
- import numpy as np
4
- from PIL import Image
5
- import sys
6
- import cv2
7
- import base64
8
- import aiohttp
9
- import fal_client
10
- sys.path.append('./ComfyUI_AutoCropFaces')
11
- from dotenv import load_dotenv
12
- load_dotenv()
13
- from Pytorch_Retinaface.pytorch_retinaface import Pytorch_RetinaFace
14
- from transformers import AutoProcessor, AutoModelForCausalLM
15
- from transformers import CLIPProcessor, CLIPModel
16
- import gc
17
-
18
-
19
- CACHE_DIR = '/workspace/huggingface_cache'
20
-
21
- os.environ["HF_HOME"] = CACHE_DIR
22
- os.makedirs(CACHE_DIR, exist_ok=True)
23
-
24
- device = "cuda"
25
-
26
- def clear_cuda_memory():
27
- """Aggressively clear CUDA memory"""
28
- gc.collect()
29
- torch.cuda.empty_cache()
30
- torch.cuda.synchronize()
31
-
32
-
33
- def load_vision_models():
34
- print("Loading CLIP and Florence models...")
35
- # Load CLIP
36
- clip_model = CLIPModel.from_pretrained(
37
- "openai/clip-vit-large-patch14",
38
- cache_dir=CACHE_DIR
39
- ).to(device)
40
- clip_processor = CLIPProcessor.from_pretrained(
41
- "openai/clip-vit-large-patch14",
42
- cache_dir=CACHE_DIR
43
- )
44
-
45
- # Load Florence
46
- florence_model = AutoModelForCausalLM.from_pretrained(
47
- "microsoft/Florence-2-large",
48
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
49
- trust_remote_code=True,
50
- cache_dir=CACHE_DIR
51
- ).to(device)
52
- florence_processor = AutoProcessor.from_pretrained(
53
- "microsoft/Florence-2-large",
54
- trust_remote_code=True,
55
- cache_dir=CACHE_DIR
56
- )
57
-
58
- return {
59
- 'clip_model': clip_model,
60
- 'clip_processor': clip_processor,
61
- 'florence_model': florence_model,
62
- 'florence_processor': florence_processor,
63
- }
64
-
65
-
66
- def generate_caption(image):
67
- vision_models = load_vision_models()
68
-
69
- # Ensure the image is a PIL Image
70
- if not isinstance(image, Image.Image):
71
- image = Image.fromarray(image)
72
-
73
- # Convert the image to RGB if it has an alpha channel
74
- if image.mode == 'RGBA':
75
- image = image.convert('RGB')
76
-
77
- prompt = "<DETAILED_CAPTION>"
78
- inputs = vision_models['florence_processor'](
79
- text=prompt,
80
- images=image,
81
- return_tensors="pt"
82
- ).to(device, torch.float16 if torch.cuda.is_available() else torch.float32)
83
-
84
- generated_ids = vision_models['florence_model'].generate(
85
- input_ids=inputs["input_ids"],
86
- pixel_values=inputs["pixel_values"],
87
- max_new_tokens=1024,
88
- num_beams=3,
89
- do_sample=False
90
- )
91
- generated_text = vision_models['florence_processor'].batch_decode(generated_ids, skip_special_tokens=True)[0]
92
- parsed_answer = vision_models['florence_processor'].post_process_generation(
93
- generated_text, task="<DETAILED_CAPTION>",
94
- image_size=(image.width, image.height)
95
- )
96
-
97
- clear_cuda_memory()
98
- return parsed_answer['<DETAILED_CAPTION>']
99
-
100
-
101
- def crop_face(image_path, output_dir, output_name, scale_factor=4.0):
102
- image = Image.open(image_path).convert("RGB")
103
-
104
- img_raw = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
105
- img_raw = img_raw.astype(np.float32)
106
-
107
- rf = Pytorch_RetinaFace(
108
- cfg='mobile0.25',
109
- pretrained_path='./weights/mobilenet0.25_Final.pth',
110
- confidence_threshold=0.02,
111
- nms_threshold=0.4,
112
- vis_thres=0.6
113
- )
114
-
115
- dets = rf.detect_faces(img_raw)
116
- print("Dets: ", dets)
117
-
118
- # Instead of asserting, handle multiple faces gracefully
119
- if len(dets) == 0:
120
- print("No faces detected!")
121
- return False
122
-
123
- # If multiple faces detected, use the one with highest confidence
124
- if len(dets) > 1:
125
- print(f"Warning: {len(dets)} faces detected, using the one with highest confidence")
126
- # Assuming dets is a list of [bbox, landmark, score] and we want to sort by score
127
- dets = sorted(dets, key=lambda x: x[2], reverse=True) # Sort by confidence score
128
- # Just keep the highest confidence detection
129
- dets = [dets[0]]
130
-
131
- # Pass the scale_factor to center_and_crop_rescale for adjustable crop size
132
- try:
133
- # Unpack the tuple correctly - the function returns (cropped_imgs, bbox_infos)
134
- cropped_imgs, bbox_infos = rf.center_and_crop_rescale(img_raw, dets, shift_factor=0.45, scale_factor=scale_factor)
135
-
136
- # Check if we got any cropped images
137
- if not cropped_imgs or len(cropped_imgs) == 0:
138
- print("No cropped images returned")
139
- return False
140
-
141
- # Use the first cropped face image directly - it's not nested
142
- img_to_save = cropped_imgs[0]
143
-
144
- os.makedirs(output_dir, exist_ok=True)
145
- cv2.imwrite(os.path.join(output_dir, output_name), img_to_save)
146
- print(f"Saved: {output_name}")
147
- return True
148
-
149
- except Exception as e:
150
- print(f"Error during face cropping: {e}")
151
- return False
152
-
153
- async def upscale_image(image_path, output_path):
154
- """Upscale an image using fal.ai's RealESRGAN model"""
155
- fal_client = FalClient()
156
-
157
- # Read and encode the image
158
- with open(image_path, "rb") as image_file:
159
- encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
160
- data_uri = f"data:image/jpeg;base64,{encoded_image}"
161
-
162
- try:
163
- # Submit the upscaling request
164
- handler = await fal_client.submit_async(
165
- "fal-ai/real-esrgan",
166
- arguments={
167
- "image_url": data_uri,
168
- "scale": 2,
169
- "model": "RealESRGAN_x4plus",
170
- "output_format": "png",
171
- "face": True
172
- },
173
- )
174
- result = await handler.get()
175
-
176
- # Download and save the upscaled image
177
- image_url = result['image_url']
178
- async with aiohttp.ClientSession() as session:
179
- async with session.get(image_url) as response:
180
- if response.status == 200:
181
- with open(output_path, 'wb') as f:
182
- f.write(await response.read())
183
- return True
184
- else:
185
- print(f"Failed to download upscaled image: {response.status}")
186
- return False
187
-
188
- except Exception as e:
189
- print(f"Error during upscaling: {e}")
190
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
install.py CHANGED
@@ -4,9 +4,8 @@ import os
4
  BASE_PATH = "./"
5
  COMFYUI_PATH = os.path.join(BASE_PATH, "ComfyUI")
6
  MODEL_PATH = os.path.join(COMFYUI_PATH, "models")
7
- CACHE_PATH = "/data/huggingface_cache"
8
 
9
- os.environ["HF_HOME"] = CACHE_PATH
10
  os.makedirs(CACHE_PATH, exist_ok=True)
11
 
12
 
 
4
  BASE_PATH = "./"
5
  COMFYUI_PATH = os.path.join(BASE_PATH, "ComfyUI")
6
  MODEL_PATH = os.path.join(COMFYUI_PATH, "models")
 
7
 
8
+ CACHE_PATH = os.getenv('HF_HOME')
9
  os.makedirs(CACHE_PATH, exist_ok=True)
10
 
11