Spaces:

yeq6x
/

Image2Body_gradio

Runtime error

App Files Files Community

yeq6x commited on Jul 6, 2024

Commit

c9cc441

0 Parent(s):

init

Browse files

Files changed (11) hide show

.gitignore +5 -0
Dockerfile.backend +35 -0
anime.py +152 -0
app.py +187 -0
data.py +97 -0
generate_prompt.py +154 -0
lineart_util.py +109 -0
model.py +186 -0
process_utils.py +345 -0
requirements.txt +19 -0
templates/index.html +60 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+models/
+__pycache__/
+venv/
+output/
+hf_gradio/

Dockerfile.backend ADDED Viewed

	@@ -0,0 +1,35 @@

+# CUDA 12.1ベースのUbuntuイメージを使用
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
+RUN ln -sf /usr/share/zoneinfo/Asia/Tokyo /etc/localtime
+# 必要なパッケージをインストール
+RUN apt-get update && apt-get install -y \
+    software-properties-common \
+    && add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update && apt-get install -y \
+    python3.10 \
+    python3.10-dev \
+    python3.10-distutils \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+# pipのインストール
+RUN wget https://bootstrap.pypa.io/get-pip.py \
+    && python3.10 get-pip.py \
+    && rm get-pip.py
+# デフォルトのpythonとpipコマンドをpython3.10とpip3.10にリンク
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
+    && update-alternatives --install /usr/bin/pip pip /usr/local/bin/pip3.10 1
+WORKDIR /app
+# 依存関係をインストール
+COPY requirements.txt /app/
+RUN apt -y update && apt -y upgrade
+RUN apt -y install libopencv-dev
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-dependencies transformers
+EXPOSE 5000
+CMD ["python", "app.py"]

anime.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""Test script for anime-to-sketch translation
+Example:
+    python3 test.py --dataroot /your_path/dir --load_size 512
+    python3 test.py --dataroot /your_path/img.jpg --load_size 512
+"""
+import os
+import torch
+from torchvision import transforms
+from data import get_image_list, get_transform
+from model import create_model
+from data import read_img_path, tensor_to_img, save_image
+import argparse
+from tqdm.auto import tqdm
+from kornia.enhance import equalize_clahe
+from PIL import Image
+import numpy as np
+# numpy配列の画像を受け取り、線画を生成してnumpy配列で返す
+def generate_sketch(image, clahe_clip=-1, load_size=512):
+    """
+    Generate sketch image from input image
+    Args:
+        image (np.ndarray): input image
+        clahe_clip (float): clip threshold for CLAHE
+        load_size (int): image size to load
+    Returns:
+        np.ndarray: output image
+    """
+    # create model
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model_opt = "default"
+    model = create_model(model_opt).to(device)
+    model.eval()
+    aus_resize = None
+    if load_size > 0:
+        aus_resize = (image.shape[0], image.shape[1])
+    transform = get_transform(load_size=load_size)
+    image = torch.from_numpy(image).permute(2, 0, 1).float()
+    # [0,255] to [-1,1]
+    image = transform(image)
+    if image.max() > 1:
+        image = (image-image.min())/(image.max()-image.min())*2-1
+    img, aus_resize = image.unsqueeze(0), aus_resize
+    if clahe_clip > 0:
+        img = (img + 1) / 2 # [-1,1] to [0,1]
+        img = equalize_clahe(img, clip_limit=clahe_clip)
+        img = (img - .5) / .5 # [0,1] to [-1,1]
+    aus_tensor = model(img.to(device))
+    # resize to original size
+    if aus_resize is not None:
+        aus_tensor = torch.nn.functional.interpolate(aus_tensor, aus_resize, mode='bilinear', align_corners=False)
+    aus_img = tensor_to_img(aus_tensor)
+    return aus_img
+if __name__ == '__main__':
+    os.chdir(os.path.dirname("Anime2Sketch/"))
+    parser = argparse.ArgumentParser(description='Anime-to-sketch test options.')
+    parser.add_argument('--dataroot','-i', default='test_samples/', type=str)
+    parser.add_argument('--load_size','-s', default=512, type=int)
+    parser.add_argument('--output_dir','-o', default='results/', type=str)
+    parser.add_argument('--gpu_ids', '-g', default=[], help="gpu ids: e.g. 0 0,1,2 0,2.")
+    parser.add_argument('--model', default="default", type=str, help="variant of model to use. you can choose from ['default','improved']")
+    parser.add_argument('--clahe_clip', default=-1, type=float, help="clip threshold for CLAHE set to -1 to disable")
+    opt = parser.parse_args()
+    # # generate sketchで線画生成
+    # for test_path in tqdm(get_image_list(opt.dataroot)):
+    #     basename = os.path.basename(test_path)
+    #     aus_path = os.path.join(opt.output_dir, basename)
+    #     # numpy配列で画像を読み込む
+    #     img = Image.open(test_path)
+    #     img = np.array(img)
+    #     aus_img = generate_sketch(img, opt.clahe_clip)
+    #     # 画像を保存
+    #     save_image(aus_img, aus_path, (512, 512))
+    # create model
+    gpu_list = ','.join(str(x) for x in opt.gpu_ids)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = create_model(opt.model).to(device)      # create a model given opt.model and other options
+    model.eval()
+    for test_path in tqdm(get_image_list(opt.dataroot)):
+        basename = os.path.basename(test_path)
+        aus_path = os.path.join(opt.output_dir, basename)
+        img = Image.open(test_path).convert('RGB')
+        img = np.array(img)
+        load_size = 512
+        aus_resize = None
+        if load_size > 0:
+            aus_resize = (img.shape[1], img.shape[0])
+        transform = get_transform(load_size=load_size)
+        img = torch.from_numpy(img).permute(2, 0, 1).float()
+        # [0,255] to [-1,1]
+        image = transform(img)
+        if image.max() > 1:
+            image = (image-image.min())/(image.max()-image.min())*2-1
+            print(image.min(), image.max())
+        img, aus_resize = image.unsqueeze(0), aus_resize
+        if opt.clahe_clip > 0:
+            img = (img + 1) / 2 # [-1,1] to [0,1]
+            img = equalize_clahe(img, clip_limit=opt.clahe_clip)
+            img = (img - .5) / .5 # [0,1] to [-1,1]
+        aus_tensor = model(img.to(device))
+        aus_img = tensor_to_img(aus_tensor)
+        save_image(aus_img, aus_path, aus_resize)
+"""
+    # create model
+    gpu_list = ','.join(str(x) for x in opt.gpu_ids)
+    os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
+    device = torch.device('cuda' if len(opt.gpu_ids)>0 else 'cpu')
+    model = create_model(opt.model).to(device)      # create a model given opt.model and other options
+    model.eval()
+    # get input data
+    if os.path.isdir(opt.dataroot):
+        test_list = get_image_list(opt.dataroot)
+    elif os.path.isfile(opt.dataroot):
+        test_list = [opt.dataroot]
+    else:
+        raise Exception("{} is not a valid directory or image file.".format(opt.dataroot))
+    # save outputs
+    save_dir = opt.output_dir
+    os.makedirs(save_dir, exist_ok=True)
+    for test_path in tqdm(test_list):
+        basename = os.path.basename(test_path)
+        aus_path = os.path.join(save_dir, basename)
+        img, aus_resize = read_img_path(test_path, opt.load_size)
+        if opt.clahe_clip > 0:
+            img = (img + 1) / 2 # [-1,1] to [0,1]
+            img = equalize_clahe(img, clip_limit=opt.clahe_clip)
+            img = (img - .5) / .5 # [0,1] to [-1,1]
+        aus_tensor = model(img.to(device))
+        print(aus_tensor.shape)
+        aus_img = tensor_to_img(aus_tensor)
+        save_image(aus_img, aus_path, aus_resize)
+"""

app.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from flask import Flask, request, render_template, send_file, jsonify, send_from_directory
+from flask_socketio import SocketIO, emit
+from flask_cors import CORS
+import io
+import os
+from PIL import Image
+import torch
+import gc
+from peft import PeftModel
+import queue
+import threading
+import uuid
+import concurrent.futures
+from process_utils import *
+app = Flask(__name__)
+# app.secret_key = 'super_secret_key'
+CORS(app)
+socketio = SocketIO(app, cors_allowed_origins="*")
+# タスクキューの作成
+task_queue = queue.Queue()
+active_tasks = {}
+task_futures = {}
+# ThreadPoolExecutorの作成
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+class Task:
+    def __init__(self, task_id, mode, weight1, weight2, file_data):
+        self.task_id = task_id
+        self.mode = mode
+        self.weight1 = weight1
+        self.weight2 = weight2
+        self.file_data = file_data
+        self.cancel_flag = False
+def update_queue_status(message=None):
+    socketio.emit('queue_update', {'active_tasks': len(active_tasks), 'message': message})
+def process_task(task):
+    try:
+        # ファイルデータをPIL Imageに変換
+        image = Image.open(io.BytesIO(task.file_data))
+        image = ensure_rgb(image)
+        # キャンセルチェック
+        if task.cancel_flag:
+            return
+        # 画像処理ロジックを呼び出す
+        sotai_image, sketch_image = process_image_as_base64(image, task.mode, task.weight1, task.weight2)
+        # キャンセルチェック
+        if task.cancel_flag:
+            return
+        socketio.emit('task_complete', {
+            'task_id': task.task_id,
+            'sotai_image': sotai_image,
+            'sketch_image': sketch_image
+        })
+    except Exception as e:
+        if not task.cancel_flag:
+            socketio.emit('task_error', {'task_id': task.task_id, 'error': str(e)})
+    finally:
+        if task.task_id in active_tasks:
+            del active_tasks[task.task_id]
+        if task.task_id in task_futures:
+            del task_futures[task.task_id]
+        update_queue_status('Task completed or cancelled')
+def worker():
+    while True:
+        try:
+            task = task_queue.get()
+            if task.task_id in active_tasks:
+                future = executor.submit(process_task, task)
+                task_futures[task.task_id] = future
+            update_queue_status(f'Task started: {task.task_id}')
+        except Exception as e:
+            print(f"Worker error: {str(e)}")
+        finally:
+            # Ensure the task is always removed from the queue
+            task_queue.task_done()
+# ワーカースレッドの開始
+threading.Thread(target=worker, daemon=True).start()
+@app.route('/submit_task', methods=['POST'])
+def submit_task():
+    task_id = str(uuid.uuid4())
+    file = request.files['file']
+    mode = request.form.get('mode', 'refine')
+    weight1 = float(request.form.get('weight1', 0.4))
+    weight2 = float(request.form.get('weight2', 0.3))
+    # ファイルデータをバイト列として保存
+    file_data = file.read()
+    task = Task(task_id, mode, weight1, weight2, file_data)
+    task_queue.put(task)
+    active_tasks[task_id] = task
+    update_queue_status(f'Task submitted: {task_id}')
+    queue_size = task_queue.qsize()
+    return jsonify({'task_id': task_id, 'queue_size': queue_size})
+@app.route('/cancel_task/<task_id>', methods=['POST'])
+def cancel_task(task_id):
+    if task_id in active_tasks:
+        task = active_tasks[task_id]
+        task.cancel_flag = True
+        if task_id in task_futures:
+            task_futures[task_id].cancel()
+            del task_futures[task_id]
+        del active_tasks[task_id]
+        update_queue_status('Task cancelled')
+        return jsonify({'message': 'Task cancellation requested'})
+    else:
+        return jsonify({'message': 'Task not found or already completed'}), 404
+def get_active_task_order(task_id):
+    return list(active_tasks.keys()).index(task_id) if task_id in active_tasks else None
+# get_task_orderイベントハンドラー
+@app.route('/get_task_order/<task_id>', methods=['GET'])
+def handle_get_task_order(task_id):
+    task_order = get_active_task_order(task_id)
+    return jsonify({'task_order': task_order})
+@socketio.on('connect')
+def handle_connect():
+    emit('queue_update', {'active_tasks': len(active_tasks), 'active_task_order': None})
+# Flaskルート
+@app.route('/', methods=['GET', 'POST'])
+def process_refined():
+    if request.method == 'POST':
+        file = request.files['file']
+        weight1 = float(request.form.get('weight1', 0.4))
+        weight2 = float(request.form.get('weight2', 0.3))
+        image = ensure_rgb(Image.open(file.stream))
+        sotai_image, sketch_image = process_image_as_base64(image, "refine", weight1, weight2)
+        return jsonify({
+            'sotai_image': sotai_image,
+            'sketch_image': sketch_image
+        })
+@app.route('/process_original', methods=['GET', 'POST'])
+def process_original():
+    if request.method == 'POST':
+        file = request.files['file']
+        image = ensure_rgb(Image.open(file.stream))
+        sotai_image, sketch_image = process_image_as_base64(image, "original")
+        return jsonify({
+            'sotai_image': sotai_image,
+            'sketch_image': sketch_image
+        })
+@app.route('/process_sketch', methods=['GET', 'POST'])
+def process_sketch():
+    if request.method == 'POST':
+        file = request.files['file']
+        image = ensure_rgb(Image.open(file.stream))
+        sotai_image, sketch_image = process_image_as_base64(image, "sketch")
+        return jsonify({
+            'sotai_image': sotai_image,
+            'sketch_image': sketch_image
+        })
+# エラーハンドラー
+@app.errorhandler(500)
+def server_error(e):
+    return jsonify(error=str(e)), 500
+if __name__ == '__main__':
+    initialize(local_model=True)
+    socketio.run(app, debug=True, host='0.0.0.0', port=5000)

data.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+from PIL import Image
+import torchvision.transforms as transforms
+try:
+    from transforms import InterpolationMode
+    bic = InterpolationMode.BICUBIC
+except ImportError:
+    bic = Image.BICUBIC
+import numpy as np
+import torch
+IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP']
+def is_image_file(filename):
+    """if a given filename is a valid image
+    Parameters:
+        filename (str) -- image filename
+    """
+    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
+def get_image_list(path):
+    """read the paths of valid images from the given directory path
+    Parameters:
+        path (str)    -- input directory path
+    """
+    assert os.path.isdir(path), '{:s} is not a valid directory'.format(path)
+    images = []
+    for dirpath, _, fnames in sorted(os.walk(path)):
+        for fname in sorted(fnames):
+            if is_image_file(fname):
+                img_path = os.path.join(dirpath, fname)
+                images.append(img_path)
+    assert images, '{:s} has no valid image file'.format(path)
+    return images
+def get_transform(load_size=0, grayscale=False, method=bic, convert=True):
+    transform_list = []
+    if grayscale:
+        transform_list.append(transforms.Grayscale(1))
+    if load_size > 0:
+        osize = [load_size, load_size]
+        transform_list.append(transforms.Resize(osize, method))
+    if convert:
+        # transform_list += [transforms.ToTensor()]
+        if grayscale:
+            transform_list += [transforms.Normalize((0.5,), (0.5,))]
+        else:
+            transform_list += [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+    return transforms.Compose(transform_list)
+def read_img_path(path, load_size):
+    """read tensors from a given image path
+    Parameters:
+        path (str)     -- input image path
+        load_size(int) -- the input size. If <= 0, don't resize
+    """
+    img = Image.open(path).convert('RGB')
+    aus_resize = None
+    if load_size > 0:
+        aus_resize = img.size
+    transform = get_transform(load_size=load_size)
+    image = transform(img)
+    return image.unsqueeze(0), aus_resize
+def tensor_to_img(input_image, imtype=np.uint8):
+    """"Converts a Tensor array into a numpy image array.
+    Parameters:
+        input_image (tensor) --  the input image tensor array
+        imtype (type)        --  the desired type of the converted numpy array
+    """
+    if not isinstance(input_image, np.ndarray):
+        if isinstance(input_image, torch.Tensor):  # get the data from a variable
+            image_tensor = input_image.data
+        else:
+            return input_image
+        image_numpy = image_tensor[0].cpu().float().numpy()  # convert it into a numpy array
+        if image_numpy.shape[0] == 1:  # grayscale to RGB
+            image_numpy = np.tile(image_numpy, (3, 1, 1))
+        image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0  # post-processing: tranpose and scaling
+    else:  # if it is a numpy array, do nothing
+        image_numpy = input_image
+    return image_numpy.astype(imtype)
+def save_image(image_numpy, image_path, output_resize=None):
+    """Save a numpy image to the disk
+    Parameters:
+        image_numpy (numpy array)    -- input numpy array
+        image_path (str)             -- the path of the image
+        output_resize(None or tuple) -- the output size. If None, don't resize
+    """
+    image_pil = Image.fromarray(image_numpy)
+    if output_resize:
+        image_pil = image_pil.resize(output_resize, bic)
+    image_pil.save(image_path)

generate_prompt.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import argparse
+import csv
+import os
+import json
+from PIL import Image
+import cv2
+import numpy as np
+from tensorflow.keras.layers import TFSMLayer
+from huggingface_hub import hf_hub_download
+from pathlib import Path
+# from wd14 tagger
+IMAGE_SIZE = 448
+# wd-v1-4-swinv2-tagger-v2 / wd-v1-4-vit-tagger / wd-v1-4-vit-tagger-v2/ wd-v1-4-convnext-tagger / wd-v1-4-convnext-tagger-v2
+DEFAULT_WD14_TAGGER_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+FILES = ["keras_metadata.pb", "saved_model.pb", "selected_tags.csv"]
+SUB_DIR = "variables"
+SUB_DIR_FILES = ["variables.data-00000-of-00001", "variables.index"]
+CSV_FILE = FILES[-1]
+def preprocess_image(image):
+    image = np.array(image)
+    image = image[:, :, ::-1]  # RGB->BGR
+    # pad to square
+    size = max(image.shape[0:2])
+    pad_x = size - image.shape[1]
+    pad_y = size - image.shape[0]
+    pad_l = pad_x // 2
+    pad_t = pad_y // 2
+    image = np.pad(image, ((pad_t, pad_y - pad_t), (pad_l, pad_x - pad_l), (0, 0)), mode="constant", constant_values=255)
+    interp = cv2.INTER_AREA if size > IMAGE_SIZE else cv2.INTER_LANCZOS4
+    image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE), interpolation=interp)
+    image = image.astype(np.float32)
+    return image
+def load_wd14_tagger_model():
+    model_dir = "wd14_tagger_model"
+    repo_id = DEFAULT_WD14_TAGGER_REPO
+    if not os.path.exists(model_dir):
+        print(f"downloading wd14 tagger model from hf_hub. id: {repo_id}")
+        for file in FILES:
+            hf_hub_download(repo_id, file, cache_dir=model_dir, force_download=True, force_filename=file)
+        for file in SUB_DIR_FILES:
+            hf_hub_download(
+                repo_id,
+                file,
+                subfolder=SUB_DIR,
+                cache_dir=os.path.join(model_dir, SUB_DIR),
+                force_download=True,
+                force_filename=file,
+            )
+    else:
+        print("using existing wd14 tagger model")
+    # モデルを読み込む
+    model = TFSMLayer(model_dir, call_endpoint='serving_default')
+    return model
+def generate_tags(images, model_dir, model):
+    with open(os.path.join(model_dir, CSV_FILE), "r", encoding="utf-8") as f:
+        reader = csv.reader(f)
+        l = [row for row in reader]
+        header = l[0]  # tag_id,name,category,count
+        rows = l[1:]
+    assert header[0] == "tag_id" and header[1] == "name" and header[2] == "category", f"unexpected csv format: {header}"
+    general_tags = [row[1] for row in rows[1:] if row[2] == "0"]
+    character_tags = [row[1] for row in rows[1:] if row[2] == "4"]
+    tag_freq = {}
+    undesired_tags = ['one-piece_swimsuit',
+                      'swimsuit',
+                      'leotard',
+                      'saitama_(one-punch_man)',
+                      '1boy',
+    ]
+    probs = model(images, training=False)
+    probs = probs['predictions_sigmoid'].numpy()
+    tag_text_list = []
+    for prob in probs:
+        combined_tags = []
+        general_tag_text = ""
+        character_tag_text = ""
+        thresh = 0.35
+        for i, p in enumerate(prob[4:]):
+            if i < len(general_tags) and p >= thresh:
+                tag_name = general_tags[i]
+                if tag_name not in undesired_tags:
+                    tag_freq[tag_name] = tag_freq.get(tag_name, 0) + 1
+                    general_tag_text += ", " + tag_name
+                    combined_tags.append(tag_name)
+            elif i >= len(general_tags) and p >= thresh:
+                tag_name = character_tags[i - len(general_tags)]
+                if tag_name not in undesired_tags:
+                    tag_freq[tag_name] = tag_freq.get(tag_name, 0) + 1
+                    character_tag_text += ", " + tag_name
+                    combined_tags.append(tag_name)
+        if len(general_tag_text) > 0:
+            general_tag_text = general_tag_text[2:]
+        if len(character_tag_text) > 0:
+            character_tag_text = character_tag_text[2:]
+        tag_text = ", ".join(combined_tags)
+        tag_text_list.append(tag_text)
+    return tag_text_list
+def generate_prompt_json(target_folder, prompt_file, model_dir, model):
+    image_files = [f for f in os.listdir(target_folder) if os.path.isfile(os.path.join(target_folder, f))]
+    image_count = len(image_files)
+    prompt_list = []
+    for i, filename in enumerate(image_files, 1):
+        source_path = "source/" + filename
+        target_path = os.path.join(target_folder, filename)  # Use absolute path
+        target_path2 = "target/" + filename
+        prompt = generate_tags(target_path, model_dir, model)
+        for j in range(4):
+            prompt_data = {
+                "source": f"{source_path.split('.')[0]}_{j}.jpg",
+                "target": f"{target_path2.split('.')[0]}_{j}.jpg",
+                "prompt": prompt
+            }
+            prompt_list.append(prompt_data)
+        print(f"Processed Images: {i}/{image_count}", end="\r", flush=True)
+    with open(prompt_file, "w") as file:
+        for prompt_data in prompt_list:
+            json.dump(prompt_data, file)
+            file.write("\n")
+    print(f"Processing completed. Total Images: {image_count}")
+if __name__ == '__main__':
+    model_dir = "wd14_tagger_model"
+    model = load_wd14_tagger_model()
+    prompt = generate_tags(target_path, model_dir, model)

lineart_util.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import cv2
+import numpy as np
+from PIL import Image
+from anime import generate_sketch
+def pad64(x):
+    return int(np.ceil(float(x) / 64.0) * 64 - x)
+def HWC3(x):
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    if C == 4:
+        color = x[:, :, 0:3].astype(np.float32)
+        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+        y = color * alpha + 255.0 * (1.0 - alpha)
+        y = y.clip(0, 255).astype(np.uint8)
+        return y
+def safer_memory(x):
+    # Fix many MAC/AMD problems
+    return np.ascontiguousarray(x.copy()).copy()
+def resize_image_with_pad(input_image, resolution, skip_hwc3=False):
+    if skip_hwc3:
+        img = input_image
+    else:
+        img = HWC3(input_image)
+    H_raw, W_raw, _ = img.shape
+    k = float(resolution) / float(min(H_raw, W_raw))
+    interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA
+    H_target = int(np.round(float(H_raw) * k))
+    W_target = int(np.round(float(W_raw) * k))
+    img = cv2.resize(img, (W_target, H_target), interpolation=interpolation)
+    H_pad, W_pad = pad64(H_target), pad64(W_target)
+    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge')
+    def remove_pad(x):
+        return safer_memory(x[:H_target, :W_target])
+    return safer_memory(img_padded), remove_pad
+def scribble_xdog(img, res=512, thr_a=32, **kwargs):
+    """
+    XDoGを使ってスケッチ画像を生成する
+    :param img: np.ndarray, 入力画像
+    :param res: int, 出力画像の解像度
+    :param thr_a: int, 閾値
+    Returns
+    -------
+    Image : PIL.Image
+    """
+    img, remove_pad = resize_image_with_pad(img, res)
+    g1 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 0.5)
+    g2 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 5.0)
+    dog = (255 - np.min(g2 - g1, axis=2)).clip(0, 255).astype(np.uint8)
+    result = np.zeros_like(img, dtype=np.uint8)
+    result[2 * (255 - dog) > thr_a] = 255
+    result = Image.fromarray(remove_pad(result))
+    return result, True
+def canny(img, res=512, thr_a=100, thr_b=200, **kwargs):
+    img, remove_pad = resize_image_with_pad(img, res)
+    result = cv2.Canny(img, thr_a, thr_b)
+    result = Image.fromarray(remove_pad(result))
+    return result, True
+def get_sketch(image, method='scribble_xdog', res=2048, thr=20, **kwargs):
+    # image: np.ndarray
+    input_height = image.shape[0]
+    input_width = image.shape[1]
+    if method == 'scribble_xdog':
+        processed_image, _ = scribble_xdog(image, res, thr) # PIL.Image
+        processed_image = processed_image.resize((input_width, input_height))
+        # make PIL.Image to cv2 and INVERSE
+        processed_image = cv2.cvtColor(np.array(processed_image), cv2.COLOR_RGB2BGR)
+        processed_image = 255 - processed_image
+        processed_image = Image.fromarray(processed_image)
+    elif method == 'anime2sketch':
+        clahe = 1.0
+        processed_image = generate_sketch(image, clahe_clip=clahe, load_size=1024) # output: numpy.ndarray
+        processed_image = Image.fromarray(processed_image)
+        # processed_image.save(output_path.split('.')[0] + f'_{clahe}.png')
+    elif method == 'both':
+        alpha = 0.5
+        # 2枚をalphaの重みで合成
+        scribble_xdog_processed_image, _ = scribble_xdog(image, res, thr)
+        scribble_xdog_processed_image = scribble_xdog_processed_image.resize((input_width, input_height))
+        scribble_xdog_processed_image = cv2.cvtColor(np.array(scribble_xdog_processed_image), cv2.COLOR_RGB2BGR)
+        scribble_xdog_processed_image = 255 - scribble_xdog_processed_image
+        anime2sketch_processed_image = generate_sketch(image, clahe_clip=1.0, load_size=1024)
+        anime2sketch_processed_image = Image.fromarray(anime2sketch_processed_image)
+        anime2sketch_processed_image = anime2sketch_processed_image.resize((input_width, input_height))
+        anime2sketch_processed_image = cv2.cvtColor(np.array(anime2sketch_processed_image), cv2.COLOR_RGB2BGR)
+        processed_image = cv2.addWeighted(scribble_xdog_processed_image, alpha, anime2sketch_processed_image, 1-alpha, 0)
+        processed_image = Image.fromarray(processed_image)
+    return processed_image

model.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import functools
+from app import download_file
+class UnetGenerator(nn.Module):
+    """Create a Unet-based generator"""
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(UnetGenerator, self).__init__()
+        # construct unet structure
+        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True)  # add the innermost layer
+        for _ in range(num_downs - 5):          # add intermediate layers with ngf * 8 filters
+            unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+        # gradually reduce the number of filters from ngf * 8 to ngf
+        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)  # add the outermost layer
+    def forward(self, input):
+        """Standard forward"""
+        return self.model(input)
+class UnetSkipConnectionBlock(nn.Module):
+    """Defines the Unet submodule with skip connection.
+        X -------------------identity----------------------
+        |-- downsampling -- |submodule| -- upsampling --|
+    """
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet submodule with skip connections.
+        Parameters:
+            outer_nc (int) -- the number of filters in the outer conv layer
+            inner_nc (int) -- the number of filters in the inner conv layer
+            input_nc (int) -- the number of channels in input images/features
+            submodule (UnetSkipConnectionBlock) -- previously defined submodules
+            outermost (bool)    -- if this module is the outermost module
+            innermost (bool)    -- if this module is the innermost module
+            norm_layer          -- normalization layer
+            use_dropout (bool)  -- if use dropout layers.
+        """
+        super(UnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        if type(norm_layer) == functools.partial:
+            use_bias = norm_layer.func == nn.InstanceNorm2d
+        else:
+            use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
+                             stride=2, padding=1, bias=use_bias)
+        downrelu = nn.LeakyReLU(0.2, True)
+        downnorm = norm_layer(inner_nc)
+        uprelu = nn.ReLU(True)
+        upnorm = norm_layer(outer_nc)
+        if outermost:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1)
+            down = [downconv]
+            up = [uprelu, upconv, nn.Tanh()]
+            model = down + [submodule] + up
+        elif innermost:
+            upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv]
+            up = [uprelu, upconv, upnorm]
+            model = down + up
+        else:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv, downnorm]
+            up = [uprelu, upconv, upnorm]
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:   # add skip connections
+            return torch.cat([x, self.model(x)], 1)
+class Smooth(nn.Module):
+    def __init__(self):
+        super().__init__()
+        kernel = [
+            [1, 2, 1],
+            [2, 4, 2],
+            [1, 2, 1]
+        ]
+        kernel = torch.tensor([[kernel]], dtype=torch.float)
+        kernel /= kernel.sum()
+        self.register_buffer('kernel', kernel)
+        self.pad = nn.ReplicationPad2d(1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        x = x.view(-1, 1, h, w)
+        x = self.pad(x)
+        x = F.conv2d(x, self.kernel)
+        return x.view(b, c, h, w)
+class Upsample(nn.Module):
+    def __init__(self, inc, outc, scale_factor=2):
+        super().__init__()
+        self.scale_factor = scale_factor
+        self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear')
+        self.smooth = Smooth()
+        self.conv = nn.Conv2d(inc, outc, kernel_size=3, stride=1, padding=1)
+        self.mlp = nn.Sequential(
+            nn.Conv2d(outc, 4 * outc, kernel_size=1, stride=1, padding=0),
+            nn.GELU(),
+            nn.Conv2d(4 * outc, outc, kernel_size=1, stride=1, padding=0),
+        )
+    def forward(self, x):
+        x = self.smooth(self.up(x))
+        x = self.conv(x)
+        x = self.mlp(x) + x
+        return x
+def create_model(model):
+    """Create a model for anime2sketch
+    hardcoding the options for simplicity
+    """
+    norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
+    net = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False)
+    import os
+    cwd = os.getcwd() # 現在のディレクトリを保存
+    os.chdir(os.path.dirname(__file__)) # このファイルのディレクトリに移動
+    if model == 'default':
+        model_path = download_file("netG.pth", subfolder="models/Anime2Sketch")
+        ckpt = torch.load(model_path)
+        for key in list(ckpt.keys()):
+            if 'module.' in key:
+                ckpt[key.replace('module.', '')] = ckpt[key]
+                del ckpt[key]
+        net.load_state_dict(ckpt)
+        os.chdir(cwd) # 元のディレクトリに戻る
+    elif model == 'improved':
+        ckpt = torch.load('weights/improved.bin', map_location=torch.device('cpu'))
+        base = net.model.model[1]
+        # swap deconvolution layers with reszie + conv layers for 2x upsampling
+        for _ in range(6):
+            inc, outc = base.model[5].in_channels, base.model[5].out_channels
+            base.model[5] = Upsample(inc, outc)
+            base = base.model[3]
+        net.load_state_dict(ckpt)
+        os.chdir(cwd) # 元のディレクトリに戻る
+    else:
+        raise ValueError(f"model should be one of ['default', 'improved'], but got {model}")
+    return net

process_utils.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import io
+import os
+import base64
+from PIL import Image
+import cv2
+import numpy as np
+from generate_prompt import load_wd14_tagger_model, generate_tags, preprocess_image as wd14_preprocess_image
+from lineart_util import scribble_xdog, get_sketch, canny
+import torch
+from diffusers import StableDiffusionPipeline, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler, AutoencoderKL
+import gc
+from peft import PeftModel
+from huggingface_hub import hf_hub_download
+from dotenv import load_dotenv
+load_dotenv()
+# グローバル変数
+local_model = False
+model = None
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+device = "cpu"
+torch_dtype = torch.float16 if device == "cuda" else torch.float32
+sotai_gen_pipe = None
+refine_gen_pipe = None
+def download_file(filename, subfolder=None):
+    return hf_hub_download(
+        repo_id=os.environ['REPO_ID'],
+        filename=filename,
+        subfolder=subfolder,
+        token=os.environ['HF_TOKEN'],
+        cache_dir=os.environ['CACHE_DIR']
+    )
+def get_file_path(filename, subfolder=None):
+    if local_model:
+        return os.path.join(subfolder, filename)
+    else:
+        return download_file(filename, subfolder)
+def ensure_rgb(image):
+    if image.mode != 'RGB':
+        return image.convert('RGB')
+    return image
+def initialize(_local_model=False):
+    global model, sotai_gen_pipe, refine_gen_pipe, local_model
+    local_model = _local_model
+    model = load_wd14_tagger_model()
+    sotai_gen_pipe = initialize_sotai_model()
+    refine_gen_pipe = initialize_refine_model()
+def load_lora(pipeline, lora_path, alpha=0.75):
+    pipeline.load_lora_weights(lora_path)
+    pipeline.fuse_lora(lora_scale=alpha)
+def initialize_sotai_model():
+    global device, torch_dtype
+    sotai_sd_model_path = get_file_path(os.environ["sotai_sd_model_name"], subfolder=os.environ["sd_models_dir"])
+    controlnet_path1 =  get_file_path(os.environ["controlnet_name1"], subfolder=os.environ["controlnet_dir2"])
+    controlnet_path2 =  get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])
+    # Load the Stable Diffusion model
+    sd_pipe = StableDiffusionPipeline.from_single_file(
+        sotai_sd_model_path,
+        torch_dtype=torch_dtype,
+        use_safetensors=True
+    ).to(device)
+    # Load the ControlNet model
+    controlnet1 = ControlNetModel.from_single_file(
+        controlnet_path1,
+        torch_dtype=torch_dtype
+    ).to(device)
+    # Load the ControlNet model
+    controlnet2 = ControlNetModel.from_single_file(
+        controlnet_path2,
+        torch_dtype=torch_dtype
+    ).to(device)
+    # Create the ControlNet pipeline
+    sotai_gen_pipe = StableDiffusionControlNetPipeline(
+        vae=sd_pipe.vae,
+        text_encoder=sd_pipe.text_encoder,
+        tokenizer=sd_pipe.tokenizer,
+        unet=sd_pipe.unet,
+        scheduler=sd_pipe.scheduler,
+        safety_checker=sd_pipe.safety_checker,
+        feature_extractor=sd_pipe.feature_extractor,
+        controlnet=[controlnet1, controlnet2]
+    ).to(device)
+    # LoRAの適用
+    lora_names = [
+        (os.environ["lora_name1"], 1.0),
+        # (os.environ["lora_name2"], 0.3),
+    ]
+    for lora_name, alpha in lora_names:
+        lora_path = get_file_path(lora_name, subfolder=os.environ["lora_dir"])
+        load_lora(sotai_gen_pipe, lora_path, alpha)
+    # スケジューラーの設定
+    sotai_gen_pipe.scheduler = UniPCMultistepScheduler.from_config(sotai_gen_pipe.scheduler.config)
+    return sotai_gen_pipe
+def initialize_refine_model():
+    global device, torch_dtype
+    refine_sd_model_path = get_file_path(os.environ["refine_sd_model_name"], subfolder=os.environ["sd_models_dir"])
+    controlnet_path3 = get_file_path(os.environ["controlnet_name3"], subfolder=os.environ["controlnet_dir1"])
+    controlnet_path4 = get_file_path(os.environ["controlnet_name4"], subfolder=os.environ["controlnet_dir1"])
+    vae_path = get_file_path(os.environ["vae_name"], subfolder=os.environ["vae_dir"])
+    # Load the Stable Diffusion model
+    sd_pipe = StableDiffusionPipeline.from_single_file(
+        refine_sd_model_path,
+        torch_dtype=torch_dtype,
+        use_safetensors=True
+    ).to(device)
+    # controlnet_path = "models/cn/control_v11p_sd15_canny.pth"
+    controlnet1 = ControlNetModel.from_single_file(
+        controlnet_path3,
+        torch_dtype=torch_dtype
+    ).to(device)
+    # Load the ControlNet model
+    controlnet2 = ControlNetModel.from_single_file(
+        controlnet_path4,
+        torch_dtype=torch_dtype
+    ).to(device)
+    # Create the ControlNet pipeline
+    refine_gen_pipe = StableDiffusionControlNetPipeline(
+        vae=AutoencoderKL.from_single_file(vae_path, torch_dtype=torch_dtype).to(device),
+        text_encoder=sd_pipe.text_encoder,
+        tokenizer=sd_pipe.tokenizer,
+        unet=sd_pipe.unet,
+        scheduler=sd_pipe.scheduler,
+        safety_checker=sd_pipe.safety_checker,
+        feature_extractor=sd_pipe.feature_extractor,
+        controlnet=[controlnet1, controlnet2],  # 複数のControlNetを指定
+    ).to(device)
+    # スケジューラーの設定
+    refine_gen_pipe.scheduler = UniPCMultistepScheduler.from_config(refine_gen_pipe.scheduler.config)
+    return refine_gen_pipe
+def get_wd_tags(images: list) -> list:
+    global model
+    if model is None:
+        initialize()
+    preprocessed_images = [wd14_preprocess_image(img) for img in images]
+    preprocessed_images = np.array(preprocessed_images)
+    return generate_tags(preprocessed_images, os.environ["wd_model_name"], model)
+def preprocess_image_for_generation(image):
+    if isinstance(image, str):  # base64文字列の場合
+        image = Image.open(io.BytesIO(base64.b64decode(image)))
+    elif isinstance(image, np.ndarray):  # numpy配列の場合
+        image = Image.fromarray(image)
+    elif not isinstance(image, Image.Image):
+        raise ValueError("Unsupported image type")
+    # 画像サイズの計算
+    input_width, input_height = image.size
+    max_size = 736
+    output_width = max_size if input_height < input_width else int(input_width / input_height * max_size)
+    output_height = max_size if input_height > input_width else int(input_height / input_width * max_size)
+    image = image.resize((output_width, output_height))
+    return image, output_width, output_height
+def binarize_image(image: Image.Image) -> np.ndarray:
+    image = np.array(image.convert('L'))
+    # 色反転
+    image = 255 - image
+    # ヒストグラム平坦化
+    clahe = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(8, 8))
+    image = clahe.apply(image)
+    # ガウシアンブラー適用
+    image = cv2.GaussianBlur(image, (5, 5), 0)
+    # 適応的二値化
+    binary_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 9, -8)
+    return binary_image
+def create_rgba_image(binary_image: np.ndarray, color: list) -> Image.Image:
+    rgba_image = np.zeros((binary_image.shape[0], binary_image.shape[1], 4), dtype=np.uint8)
+    rgba_image[:, :, 0] = color[0]
+    rgba_image[:, :, 1] = color[1]
+    rgba_image[:, :, 2] = color[2]
+    rgba_image[:, :, 3] = binary_image
+    return Image.fromarray(rgba_image, 'RGBA')
+def generate_sotai_image(input_image: Image.Image, output_width: int, output_height: int) -> Image.Image:
+    input_image = ensure_rgb(input_image)
+    global sotai_gen_pipe
+    if sotai_gen_pipe is None:
+        initialize()
+    prompt = "anime pose, girl, (white background:1.5), (monochrome:1.5), full body, sketch, eyes, breasts, (slim legs, skinny legs:1.2)"
+    try:
+        # 入力画像のリサイズ
+        if input_image.size[0] > input_image.size[1]:
+            input_image = input_image.resize((512, int(512 * input_image.size[1] / input_image.size[0])))
+        else:
+            input_image = input_image.resize((int(512 * input_image.size[0] / input_image.size[1]), 512))
+        # EasyNegativeV2の内容
+        easy_negative_v2 = "(worst quality, low quality, normal quality:1.4), lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry, artist name, (bad_prompt_version2:0.8)"
+        output = sotai_gen_pipe(
+            prompt,
+            image=[input_image, input_image],
+            negative_prompt=f"(wings:1.6), (clothes, garment, lighting, gray, missing limb, extra line, extra limb, extra arm, extra legs, hair, bangs, fringe, forelock, front hair, fill:1.4), (ink pool:1.6)",
+            # negative_prompt=f"{easy_negative_v2}, (wings:1.6), (clothes, garment, lighting, gray, missing limb, extra line, extra limb, extra arm, extra legs, hair, bangs, fringe, forelock, front hair, fill:1.4), (ink pool:1.6)",
+            num_inference_steps=40,
+            guidance_scale=8,
+            width=output_width,
+            height=output_height,
+            denoising_strength=0.13,
+            num_images_per_prompt=1,  # Equivalent to batch_size
+            guess_mode=[True, True],  # Equivalent to pixel_perfect
+            controlnet_conditioning_scale=[1.2, 1.3],  # 各ControlNetの重み
+            guidance_start=[0.0, 0.0],
+            guidance_end=[1.0, 1.0],
+        )
+        generated_image = output.images[0]
+        return generated_image
+    finally:
+        # メモリ解放
+        if device == "cuda":
+            torch.cuda.empty_cache()
+        gc.collect()
+def generate_refined_image(prompt: str, original_image: Image.Image, output_width: int, output_height: int, weight1: float, weight2: float) -> Image.Image:
+    original_image = ensure_rgb(original_image)
+    global refine_gen_pipe
+    if refine_gen_pipe is None:
+        initialize()
+    try:
+        original_image_np = np.array(original_image)
+        # scribble_xdog
+        scribble_image, _ = scribble_xdog(original_image_np, 2048, 20)
+        original_image = original_image.resize((output_width, output_height))
+        output = refine_gen_pipe(
+            prompt,
+            image=[scribble_image, original_image],  # 2つのControlNetに対応する入力画像
+            negative_prompt="extra limb, monochrome, black and white",
+            num_inference_steps=20,
+            width=output_width,
+            height=output_height,
+            controlnet_conditioning_scale=[weight1, weight2],  # 各ControlNetの重み
+            control_guidance_start=[0.0, 0.0],
+            control_guidance_end=[1.0, 1.0],
+            guess_mode=[False, False],  # pixel_perfect
+        )
+        generated_image = output.images[0]
+        return generated_image
+    finally:
+        # メモリ解放
+        if device == "cuda":
+            torch.cuda.empty_cache()
+        gc.collect()
+def process_image(input_image, mode: str, weight1: float = 0.4, weight2: float = 0.3):
+    input_image = ensure_rgb(input_image)
+    # サイズを取得
+    input_width, input_height = input_image.size
+    max_size = 736
+    output_width = max_size if input_height < input_width else int(input_width / input_height * max_size)
+    output_height = max_size if input_height > input_width else int(input_height / input_width * max_size)
+    if mode == "refine":
+        # WD-14 taggerを使用してプロンプトを生成
+        image_np = np.array(ensure_rgb(input_image))
+        prompt = get_wd_tags([image_np])[0]
+        prompt = f"{prompt}"
+        print(prompt)
+        refined_image = generate_refined_image(prompt, input_image, output_width, output_height, weight1, weight2)
+        refined_image = refined_image.convert('RGB')
+        # スケッチ画像を生成
+        refined_image_np = np.array(refined_image)
+        sketch_image = get_sketch(refined_image_np, "both", 2048, 10)
+        sketch_image = sketch_image.resize((output_width, output_height))  # 画像サイズを合わせる
+        # スケッチ画像の二値化
+        sketch_binary = binarize_image(sketch_image)
+        # RGBAに変換（透明なベース画像を作成）して、青い線を設定
+        sketch_image = create_rgba_image(sketch_binary, [0, 0, 255])
+        # 素体画像の生成
+        sotai_image = generate_sotai_image(refined_image, output_width, output_height)
+    elif mode == "original":
+        sotai_image = generate_sotai_image(input_image, output_width, output_height)
+        # スケッチ画像の生成
+        input_image_np = np.array(input_image)
+        sketch_image = get_sketch(input_image_np, "both", 2048, 16)
+    elif mode == "sketch":
+        # スケッチ画像の生成
+        input_image_np = np.array(input_image)
+        sketch_image = get_sketch(input_image_np, "both", 2048, 16)
+        # 素体画像の生成
+        sotai_image = generate_sotai_image(sketch_image, output_width, output_height)
+    else:
+        raise ValueError("Invalid mode")
+    # 素体画像の二値化
+    sotai_binary = binarize_image(sotai_image)
+    # RGBAに変換（透明なベース画像を作成）して、赤い線を設定
+    sotai_image = create_rgba_image(sotai_binary, [255, 0, 0])
+    return sotai_image, sketch_image
+def image_to_base64(img_array):
+    buffered = io.BytesIO()
+    img_array.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode()
+def process_image_as_base64(input_image, mode: str, weight1: float = 0.4, weight2: float = 0.3):
+    sotai_image, sketch_image = process_image(input_image, mode, weight1, weight2)
+    return image_to_base64(sotai_image), image_to_base64(sketch_image)

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+--extra-index-url https://download.pytorch.org/whl/cu116
+torch==2.2.0
+torchvision==0.17.0
+torchaudio==2.2.0
+diffusers==0.29.1
+Flask==3.0.3
+Flask-Cors==4.0.0
+gradio==4.36.1
+huggingface_hub==0.23.2
+kornia==0.7.1
+numpy==1.23.5
+opencv-python==4.9.0.80
+Pillow==10.3.0
+Requests==2.32.3
+tensorflow==2.16.1
+transforms==0.2.1
+tokenizers
+pytorch_lightning
+python-dotenv

templates/index.html ADDED Viewed

	@@ -0,0 +1,60 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Scribble Image Generator</title>
+    <style>
+        body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
+        .image-container { display: flex; justify-content: space-between; margin-top: 20px; }
+        .image-container img { max-width: 48%; height: auto; }
+        #loading { display: none; }
+    </style>
+</head>
+<body>
+    <h1>Scribble Image Generator</h1>
+    <form id="upload-form">
+        <input type="file" id="file-input" accept="image/*" required>
+        <br><br>
+        <label for="threshold">Threshold:</label>
+        <input type="number" id="threshold" name="threshold" value="20" min="1" max="64">
+        <br><br>
+        <label for="processor_res">Processor Resolution:</label>
+        <input type="number" id="processor_res" name="processor_res" value="2048" min="64" max="2048">
+        <br><br>
+        <button type="submit">Generate Scribble</button>
+    </form>
+    <div id="loading">Processing...</div>
+    <div class="image-container">
+        <img id="original-image" alt="Original Image">
+        <img id="scribble-image" alt="Scribble Image">
+    </div>
+    <script>
+        document.getElementById('upload-form').addEventListener('submit', function(e) {
+            e.preventDefault();
+            var formData = new FormData();
+            formData.append('file', document.getElementById('file-input').files[0]);
+            formData.append('threshold', document.getElementById('threshold').value);
+            formData.append('processor_res', document.getElementById('processor_res').value);
+            document.getElementById('loading').style.display = 'block';
+            fetch('/process', {
+                method: 'POST',
+                body: formData
+            })
+            .then(response => response.json())
+            .then(data => {
+                document.getElementById('original-image').src = 'data:image/png;base64,' + data.original_image;
+                document.getElementById('scribble-image').src = 'data:image/png;base64,' + data.scribble_image;
+                document.getElementById('loading').style.display = 'none';
+            })
+            .catch(error => {
+                console.error('Error:', error);
+                document.getElementById('loading').style.display = 'none';
+            });
+        });
+    </script>
+</body>
+</html>