Spaces:
Sleeping
Sleeping
File size: 5,219 Bytes
495db77 0be6001 b26b6ee 0be6001 b26b6ee c71ddef 42f5a78 c71ddef b26b6ee c71ddef b26b6ee 0be6001 c71ddef b26b6ee 0be6001 b26b6ee 0be6001 42f5a78 c71ddef 42f5a78 c71ddef 42f5a78 c71ddef 70c9623 c71ddef 70c9623 c71ddef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import os
import re
from functools import lru_cache
from typing import List, Mapping, Tuple
import gradio as gr
import numpy as np
import onnxruntime as ort
from PIL import Image
from huggingface_hub import hf_hub_download
import io
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
import uvicorn
app = FastAPI()
def _yield_tags_from_txt_file(txt_file: str):
with open(txt_file, 'r') as f:
for line in f:
if line:
yield line.strip()
@lru_cache()
def get_deepdanbooru_tags() -> List[str]:
tags_file = hf_hub_download('chinoll/deepdanbooru', 'tags.txt')
return list(_yield_tags_from_txt_file(tags_file))
@lru_cache()
def get_deepdanbooru_onnx() -> ort.InferenceSession:
onnx_file = hf_hub_download('chinoll/deepdanbooru', 'deepdanbooru.onnx')
return ort.InferenceSession(onnx_file)
def image_preprocess(image: Image.Image) -> np.ndarray:
if image.mode != 'RGB':
image = image.convert('RGB')
o_width, o_height = image.size
scale = 512.0 / max(o_width, o_height)
f_width, f_height = map(lambda x: int(x * scale), (o_width, o_height))
image = image.resize((f_width, f_height))
data = np.asarray(image).astype(np.float32) / 255 # H x W x C
height_pad_left = (512 - f_height) // 2
height_pad_right = 512 - f_height - height_pad_left
width_pad_left = (512 - f_width) // 2
width_pad_right = 512 - f_width - width_pad_left
data = np.pad(
data,
((height_pad_left, height_pad_right), (width_pad_left, width_pad_right), (0, 0)),
mode='constant',
constant_values=0.0
)
assert data.shape == (512, 512, 3), f'Shape (512, 512, 3) expected, but {data.shape!r} found.'
return data.reshape((1, 512, 512, 3)) # B x H x W x C
RE_SPECIAL = re.compile(r'([\\()])')
def image_to_deepdanbooru_tags(
image: Image.Image,
threshold: float,
use_spaces: bool,
use_escape: bool,
include_ranks: bool,
score_descend: bool
) -> Tuple[str, Mapping[str, float]]:
tags = get_deepdanbooru_tags()
session = get_deepdanbooru_onnx()
input_name = session.get_inputs()[0].name
output_names = [output.name for output in session.get_outputs()]
result = session.run(output_names, {input_name: image_preprocess(image)})[0]
filtered_tags = {
tag: float(score) for tag, score in zip(tags, result[0])
if score >= threshold
}
text_items = []
tags_pairs = filtered_tags.items()
if score_descend:
tags_pairs = sorted(tags_pairs, key=lambda x: (-x[1], x[0]))
for tag, score in tags_pairs:
tag_outformat = tag
if use_spaces:
tag_outformat = tag_outformat.replace('_', ' ')
if use_escape:
tag_outformat = re.sub(RE_SPECIAL, r'\\\1', tag_outformat)
if include_ranks:
tag_outformat = f"({tag_outformat}:{score:.3f})"
text_items.append(tag_outformat)
output_text = ', '.join(text_items)
return output_text, filtered_tags
from typing import Optional
@app.post("/tagging")
async def tagging_endpoint(
image: UploadFile = File(...),
threshold: Optional[float] = Form(0.5)
):
image_data = await image.read()
pil_image = Image.open(io.BytesIO(image_data)).convert("RGB")
output_text, filtered_tags = image_to_deepdanbooru_tags(
pil_image,
threshold=threshold,
use_spaces=False,
use_escape=False,
include_ranks=False,
score_descend=True
)
tags = list(filtered_tags.keys())
return JSONResponse(content={"tags": tags})
def gradio_interface(
image: Image.Image,
threshold: float,
use_spaces: bool,
use_escape: bool,
include_ranks: bool,
score_descend: bool
):
output_text, filtered_tags = image_to_deepdanbooru_tags(
image, threshold, use_spaces, use_escape, include_ranks, score_descend
)
return output_text, filtered_tags
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr_input_image = gr.Image(type='pil', label='Original Image')
gr_threshold = gr.Slider(0.0, 1.0, 0.5, label='Tagging Confidence Threshold')
with gr.Row():
gr_space = gr.Checkbox(value=False, label='Use Space Instead Of _')
gr_escape = gr.Checkbox(value=True, label='Use Text Escape')
gr_confidence = gr.Checkbox(value=False, label='Keep Confidences')
gr_order = gr.Checkbox(value=True, label='Descend By Confidence')
gr_btn_submit = gr.Button(value='Tagging', variant='primary')
with gr.Column():
with gr.Tabs():
with gr.Tab("Tags"):
gr_tags = gr.Label(label='Tags')
with gr.Tab("Exported Text"):
gr_output_text = gr.TextArea(label='Exported Text')
gr_btn_submit.click(
gradio_interface,
inputs=[gr_input_image, gr_threshold, gr_space, gr_escape, gr_confidence, gr_order],
outputs=[gr_output_text, gr_tags],
)
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == '__main__':
uvicorn.run(app, host='0.0.0.0', port=7860)
|