Spaces:

Xuanyou
/

Spirit_Animals_Example

Runtime error

App Files Files Community

Xuanyou commited on Dec 8, 2024

Commit

aa38aa3

verified ·

1 Parent(s): 055aca1

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -29

app.py CHANGED Viewed

@@ -51,6 +51,8 @@ def download_liveportrait():
     except Exception as e:
         print("Failed to initialize LivePortrait:", e)
         raise
 download_liveportrait()
@@ -80,6 +82,7 @@ def download_huggingface_resources():
         print("General error in downloading resources:", e)
         raise
 download_huggingface_resources()
@@ -87,16 +90,20 @@ def get_project_root():
     """Get the root directory of the current project."""
     return os.path.abspath(os.path.dirname(__file__))
 # Ensure working directory is project root
 os.chdir(get_project_root())
 # Initialize the necessary models and components
 mp_pose = mp.solutions.pose
 mp_drawing = mp.solutions.drawing_utils
 # Load ControlNet model
 controlnet = ControlNetModel.from_pretrained('lllyasviel/sd-controlnet-openpose', torch_dtype=torch.float16)
 # Load Stable Diffusion model with ControlNet
 pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
     'runwayml/stable-diffusion-v1-5',
@@ -104,6 +111,7 @@ pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
     torch_dtype=torch.float16
 )
 # Load Inpaint Controlnet
 pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrained(
     "runwayml/stable-diffusion-inpainting",
@@ -111,6 +119,7 @@ pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrain
     torch_dtype=torch.float16
 )
 # Move to GPU if available
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 pipe_controlnet.to(device)
@@ -163,12 +172,9 @@ def crop_face_to_square(image_rgb, padding_ratio=0.2, height_multiplier=1.5):
     cropped_image = image_rgb[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
-    # resized_image = cv2.resize(cropped_image, (768, int(768 * height_multiplier)), interpolation=cv2.INTER_AREA)
     return cropped_image
 def spirit_animal_baseline(image_path, num_images = 4):
     image = cv2.imread(image_path)
@@ -212,7 +218,7 @@ def spirit_animal_baseline(image_path, num_images = 4):
     pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
     base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
-    api_key = "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"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "gpt-4o-mini",
@@ -298,7 +304,7 @@ def spirit_animal_with_background(image_path, num_images = 4):
     pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
     base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
-    api_key = "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"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "gpt-4o-mini",
@@ -370,7 +376,7 @@ def generate_multiple_animals(image_path, keep_background=True, num_images = 4,
     gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)
     base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
-    api_key = "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"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "gpt-4o-mini",
@@ -566,7 +572,7 @@ def compress_video(input_path, output_path, target_size_mb):
     temp_output = "./temp_compressed.mp4"
     cap = cv2.VideoCapture(input_path)
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 使用 mp4 编码
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -583,7 +589,7 @@ def compress_video(input_path, output_path, target_size_mb):
     current_size = os.path.getsize(temp_output)
     if current_size > target_size_bytes:
-        bitrate = int(target_size_bytes * 8 / (current_size / target_size_bytes))  # 按比例缩减比特率
         os.system(f"ffmpeg -i {temp_output} -b:v {bitrate} -y {output_path}")
         os.remove(temp_output)
     else:
@@ -592,20 +598,6 @@ def compress_video(input_path, output_path, target_size_mb):
 def process_video(video_file):
-    # # 初始化 LivePortrait
-    # try:
-    #     download_liveportrait()
-    # except Exception as e:
-    #     print("Failed to initialize LivePortrait:", e)
-    #     return gr.update(value=None, visible=False)
-    #     # 下载 Hugging Face 资源
-    # try:
-    #     download_huggingface_resources()
-    # except Exception as e:
-    #     print("Failed to download Hugging Face resources:", e)
-    #     return gr.update(value=None, visible=False)
     compressed_path = "./uploaded_video_compressed.mp4"
     compress_video(video_file, compressed_path, target_size_mb=1)
     print(f"Compressed and moved video to: {compressed_path}")
@@ -671,22 +663,21 @@ title_html = """
 description_text = """
 ### Project Overview
-Welcome to the Spirit Animal Generator! This tool leverages advanced AI technologies to create unique visualizations of spirit animals from both videos and images.
 #### Key Features:
-1. **Video Transformation**: Upload a driving video to generate a creative spirit animal animation.
-2. **Image Creation**: Upload an image and customize the spirit animal type and background options.
-3. **AI-Powered Prompting**: OpenAI's GPT generates descriptive prompts for each input.
-4. **High-Quality Outputs**: Generated using Stable Diffusion and ControlNet for stunning visuals.
 ---
 ### How It Works:
 1. **Upload Your Media**:
-   - Videos: Ensure the file is in MP4 format.
    - Images: Use clear, high-resolution photos for better results.
 2. **Customize Options**:
    - For images, select the type of animal and background settings.
 3. **View Your Results**:
-   - Videos will be transformed into animations.
    - Images will produce customized visual art along with a generated prompt.
 Discover your spirit animal and let your imagination run wild!
 ---
 """

     except Exception as e:
         print("Failed to initialize LivePortrait:", e)
         raise
 download_liveportrait()
         print("General error in downloading resources:", e)
         raise
 download_huggingface_resources()
     """Get the root directory of the current project."""
     return os.path.abspath(os.path.dirname(__file__))
 # Ensure working directory is project root
 os.chdir(get_project_root())
 # Initialize the necessary models and components
 mp_pose = mp.solutions.pose
 mp_drawing = mp.solutions.drawing_utils
 # Load ControlNet model
 controlnet = ControlNetModel.from_pretrained('lllyasviel/sd-controlnet-openpose', torch_dtype=torch.float16)
 # Load Stable Diffusion model with ControlNet
 pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
     'runwayml/stable-diffusion-v1-5',
     torch_dtype=torch.float16
 )
 # Load Inpaint Controlnet
 pipe_inpaint_controlnet = StableDiffusionControlNetInpaintPipeline.from_pretrained(
     "runwayml/stable-diffusion-inpainting",
     torch_dtype=torch.float16
 )
 # Move to GPU if available
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 pipe_controlnet.to(device)
     cropped_image = image_rgb[top_left_y:bottom_right_y, top_left_x:bottom_right_x]
     return cropped_image
 def spirit_animal_baseline(image_path, num_images = 4):
     image = cv2.imread(image_path)
     pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
     base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
+    api_key = os.getenv("GPT_KEY")
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "gpt-4o-mini",
     pose_pil = Image.fromarray(cv2.resize(pose_image, (gen_width, gen_height), interpolation=cv2.INTER_LANCZOS4))
     base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
+    api_key = os.getenv("GPT_KEY")
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "gpt-4o-mini",
     gen_width, gen_height = resize_to_multiple_of_64(gen_width, gen_height)
     base64_image = base64.b64encode(cv2.imencode('.jpg', image_rgb)[1]).decode()
+    api_key = os.getenv("GPT_KEY")
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "gpt-4o-mini",
     temp_output = "./temp_compressed.mp4"
     cap = cv2.VideoCapture(input_path)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     current_size = os.path.getsize(temp_output)
     if current_size > target_size_bytes:
+        bitrate = int(target_size_bytes * 8 / (current_size / target_size_bytes))
         os.system(f"ffmpeg -i {temp_output} -b:v {bitrate} -y {output_path}")
         os.remove(temp_output)
     else:
 def process_video(video_file):
     compressed_path = "./uploaded_video_compressed.mp4"
     compress_video(video_file, compressed_path, target_size_mb=1)
     print(f"Compressed and moved video to: {compressed_path}")
 description_text = """
 ### Project Overview
+Welcome to the Spirit Animal Generator! This tool leverages Stable Diffusion models to create unique visualizations of spirit animals from videos and images.
 #### Key Features:
+1. **Prompting**: [GPT Model](https://arxiv.org/abs/2305.10435) generates descriptive prompts for each media input.
+2. **Image Creation**: [ControlNet Model](https://arxiv.org/abs/2302.05543) generates animal images with pose control.
+3. **Video Transformation**: [LivePortrait Model](https://arxiv.org/abs/2407.03168) generate animal animation with same facial expressions.
 ---
 ### How It Works:
 1. **Upload Your Media**:
    - Images: Use clear, high-resolution photos for better results.
+   - Videos: Ensure the file is in MP4 format.
 2. **Customize Options**:
    - For images, select the type of animal and background settings.
 3. **View Your Results**:
    - Images will produce customized visual art along with a generated prompt.
+   - Videos will be transformed into animal animations.
 Discover your spirit animal and let your imagination run wild!
 ---
 """