Xuanyou commited on
Commit
aa38aa3
·
verified ·
1 Parent(s): 055aca1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -29
app.py CHANGED
@@ -51,6 +51,8 @@ def download_liveportrait():
51
  except Exception as e:
52
  print("Failed to initialize LivePortrait:", e)
53
  raise
 
 
54
  download_liveportrait()
55
 
56
 
@@ -80,6 +82,7 @@ def download_huggingface_resources():
80
  print("General error in downloading resources:", e)
81
  raise
82
 
 
83
  download_huggingface_resources()
84
 
85
 
@@ -87,16 +90,20 @@ def get_project_root():
87
  """Get the root directory of the current project."""
88
  return os.path.abspath(os.path.dirname(__file__))
89
 
 
90
  # Ensure working directory is project root
91
  os.chdir(get_project_root())
92
 
 
93
  # Initialize the necessary models and components
94
  mp_pose = mp.solutions.pose
95
  mp_drawing = mp.solutions.drawing_utils
96
 
 
97
  # Load ControlNet model
98
  controlnet = ControlNetModel.from_pretrained('lllyasviel/sd-controlnet-openpose', torch_dtype=torch.float16)
99
 
 
100
  # Load Stable Diffusion model with ControlNet
101
  pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
102
  'runwayml/stable-diffusion-v1-5',
@@ -104,6 +111,7 @@ pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
104
  torch_dtype=torch.float16
105
  )
106
 
 
107
  # Load Inpaint Controlnet
108
  pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrained(
109
  "runwayml/stable-diffusion-inpainting",
@@ -111,6 +119,7 @@ pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrain
111
  torch_dtype=torch.float16
112
  )
113
 
 
114
  # Move to GPU if available
115
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
116
  pipe_controlnet.to(device)
@@ -163,12 +172,9 @@ def crop_face_to_square(image_rgb, padding_ratio=0.2, height_multiplier=1.5):
163
 
164
  cropped_image = image_rgb[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
165
 
166
- # resized_image = cv2.resize(cropped_image, (768, int(768 * height_multiplier)), interpolation=cv2.INTER_AREA)
167
-
168
  return cropped_image
169
 
170
 
171
-
172
  def spirit_animal_baseline(image_path, num_images = 4):
173
 
174
  image = cv2.imread(image_path)
@@ -212,7 +218,7 @@ def spirit_animal_baseline(image_path, num_images = 4):
212
  pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
213
 
214
  base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
215
- api_key = "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"
216
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
217
  payload = {
218
  "model": "gpt-4o-mini",
@@ -298,7 +304,7 @@ def spirit_animal_with_background(image_path, num_images = 4):
298
  pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
299
 
300
  base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
301
- api_key = "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"
302
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
303
  payload = {
304
  "model": "gpt-4o-mini",
@@ -370,7 +376,7 @@ def generate_multiple_animals(image_path, keep_background=True, num_images = 4,
370
  gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)
371
 
372
  base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
373
- api_key = "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"
374
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
375
  payload = {
376
  "model": "gpt-4o-mini",
@@ -566,7 +572,7 @@ def compress_video(input_path, output_path, target_size_mb):
566
  temp_output = "./temp_compressed.mp4"
567
 
568
  cap = cv2.VideoCapture(input_path)
569
- fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用 mp4 编码
570
  fps = int(cap.get(cv2.CAP_PROP_FPS))
571
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
572
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -583,7 +589,7 @@ def compress_video(input_path, output_path, target_size_mb):
583
 
584
  current_size = os.path.getsize(temp_output)
585
  if current_size > target_size_bytes:
586
- bitrate = int(target_size_bytes * 8 / (current_size / target_size_bytes)) # 按比例缩减比特率
587
  os.system(f"ffmpeg -i {temp_output} -b:v {bitrate} -y {output_path}")
588
  os.remove(temp_output)
589
  else:
@@ -592,20 +598,6 @@ def compress_video(input_path, output_path, target_size_mb):
592
 
593
  def process_video(video_file):
594
 
595
- # # 初始化 LivePortrait
596
- # try:
597
- # download_liveportrait()
598
- # except Exception as e:
599
- # print("Failed to initialize LivePortrait:", e)
600
- # return gr.update(value=None, visible=False)
601
-
602
- # # 下载 Hugging Face 资源
603
- # try:
604
- # download_huggingface_resources()
605
- # except Exception as e:
606
- # print("Failed to download Hugging Face resources:", e)
607
- # return gr.update(value=None, visible=False)
608
-
609
  compressed_path = "./uploaded_video_compressed.mp4"
610
  compress_video(video_file, compressed_path, target_size_mb=1)
611
  print(f"Compressed and moved video to: {compressed_path}")
@@ -671,22 +663,21 @@ title_html = """
671
 
672
  description_text = """
673
  ### Project Overview
674
- Welcome to the Spirit Animal Generator! This tool leverages advanced AI technologies to create unique visualizations of spirit animals from both videos and images.
675
  #### Key Features:
676
- 1. **Video Transformation**: Upload a driving video to generate a creative spirit animal animation.
677
- 2. **Image Creation**: Upload an image and customize the spirit animal type and background options.
678
- 3. **AI-Powered Prompting**: OpenAI's GPT generates descriptive prompts for each input.
679
- 4. **High-Quality Outputs**: Generated using Stable Diffusion and ControlNet for stunning visuals.
680
  ---
681
  ### How It Works:
682
  1. **Upload Your Media**:
683
- - Videos: Ensure the file is in MP4 format.
684
  - Images: Use clear, high-resolution photos for better results.
 
685
  2. **Customize Options**:
686
  - For images, select the type of animal and background settings.
687
  3. **View Your Results**:
688
- - Videos will be transformed into animations.
689
  - Images will produce customized visual art along with a generated prompt.
 
690
  Discover your spirit animal and let your imagination run wild!
691
  ---
692
  """
 
51
  except Exception as e:
52
  print("Failed to initialize LivePortrait:", e)
53
  raise
54
+
55
+
56
  download_liveportrait()
57
 
58
 
 
82
  print("General error in downloading resources:", e)
83
  raise
84
 
85
+
86
  download_huggingface_resources()
87
 
88
 
 
90
  """Get the root directory of the current project."""
91
  return os.path.abspath(os.path.dirname(__file__))
92
 
93
+
94
  # Ensure working directory is project root
95
  os.chdir(get_project_root())
96
 
97
+
98
  # Initialize the necessary models and components
99
  mp_pose = mp.solutions.pose
100
  mp_drawing = mp.solutions.drawing_utils
101
 
102
+
103
  # Load ControlNet model
104
  controlnet = ControlNetModel.from_pretrained('lllyasviel/sd-controlnet-openpose', torch_dtype=torch.float16)
105
 
106
+
107
  # Load Stable Diffusion model with ControlNet
108
  pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
109
  'runwayml/stable-diffusion-v1-5',
 
111
  torch_dtype=torch.float16
112
  )
113
 
114
+
115
  # Load Inpaint Controlnet
116
  pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrained(
117
  "runwayml/stable-diffusion-inpainting",
 
119
  torch_dtype=torch.float16
120
  )
121
 
122
+
123
  # Move to GPU if available
124
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
125
  pipe_controlnet.to(device)
 
172
 
173
  cropped_image = image_rgb[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
174
 
 
 
175
  return cropped_image
176
 
177
 
 
178
  def spirit_animal_baseline(image_path, num_images = 4):
179
 
180
  image = cv2.imread(image_path)
 
218
  pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
219
 
220
  base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
221
+ api_key = os.getenv("GPT_KEY")
222
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
223
  payload = {
224
  "model": "gpt-4o-mini",
 
304
  pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
305
 
306
  base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
307
+ api_key = os.getenv("GPT_KEY")
308
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
309
  payload = {
310
  "model": "gpt-4o-mini",
 
376
  gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)
377
 
378
  base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
379
+ api_key = os.getenv("GPT_KEY")
380
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
381
  payload = {
382
  "model": "gpt-4o-mini",
 
572
  temp_output = "./temp_compressed.mp4"
573
 
574
  cap = cv2.VideoCapture(input_path)
575
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
576
  fps = int(cap.get(cv2.CAP_PROP_FPS))
577
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
578
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
589
 
590
  current_size = os.path.getsize(temp_output)
591
  if current_size > target_size_bytes:
592
+ bitrate = int(target_size_bytes * 8 / (current_size / target_size_bytes))
593
  os.system(f"ffmpeg -i {temp_output} -b:v {bitrate} -y {output_path}")
594
  os.remove(temp_output)
595
  else:
 
598
 
599
  def process_video(video_file):
600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  compressed_path = "./uploaded_video_compressed.mp4"
602
  compress_video(video_file, compressed_path, target_size_mb=1)
603
  print(f"Compressed and moved video to: {compressed_path}")
 
663
 
664
  description_text = """
665
  ### Project Overview
666
+ Welcome to the Spirit Animal Generator! This tool leverages Stable Diffusion models to create unique visualizations of spirit animals from videos and images.
667
  #### Key Features:
668
+ 1. **Prompting**: [GPT Model](https://arxiv.org/abs/2305.10435) generates descriptive prompts for each media input.
669
+ 2. **Image Creation**: [ControlNet Model](https://arxiv.org/abs/2302.05543) generates animal images with pose control.
670
+ 3. **Video Transformation**: [LivePortrait Model](https://arxiv.org/abs/2407.03168) generate animal animation with same facial expressions.
 
671
  ---
672
  ### How It Works:
673
  1. **Upload Your Media**:
 
674
  - Images: Use clear, high-resolution photos for better results.
675
+ - Videos: Ensure the file is in MP4 format.
676
  2. **Customize Options**:
677
  - For images, select the type of animal and background settings.
678
  3. **View Your Results**:
 
679
  - Images will produce customized visual art along with a generated prompt.
680
+ - Videos will be transformed into animal animations.
681
  Discover your spirit animal and let your imagination run wild!
682
  ---
683
  """