Spaces:
Running
on
Zero
Running
on
Zero
fancyfeast
commited on
Commit
·
ccb684e
1
Parent(s):
301ae18
Tweak UI
Browse files- JoyCaptionLogo1.svg +10 -0
- app.py +23 -7
JoyCaptionLogo1.svg
ADDED
|
app.py
CHANGED
@@ -13,12 +13,14 @@ TITLE = """<style>
|
|
13 |
gap:16px; margin:4px 0 12px;}
|
14 |
.joy-header h1{margin:0; font-size:1.9rem; line-height:1.2;}
|
15 |
.joy-header p {margin:2px 0 0; font-size:0.9rem; color:#666;}
|
|
|
16 |
</style>
|
17 |
|
18 |
<div class="joy-header">
|
|
|
19 |
<div>
|
20 |
<h1>JoyCaption <span style="font-weight:400">Beta One</span></h1>
|
21 |
-
<p>Image-captioning model | build
|
22 |
</div>
|
23 |
</div>
|
24 |
<hr>"""
|
@@ -46,12 +48,12 @@ DESCRIPTION = """
|
|
46 |
<tr><td><strong>Straightforward</strong></td>
|
47 |
<td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
|
48 |
<tr><td><strong>Stable Diffusion Prompt</strong></td>
|
49 |
-
<td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
50 |
<tr><td><strong>MidJourney</strong></td>
|
51 |
-
<td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
52 |
<tr><td><strong>Danbooru tag list</strong></td>
|
53 |
<td>Comma-separated tags strictly following Danbooru conventions
|
54 |
-
(artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
55 |
<tr><td><strong>e621 tag list</strong></td>
|
56 |
<td>Alphabetical, namespaced tags in e621 style – includes species/meta
|
57 |
tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
@@ -59,7 +61,7 @@ DESCRIPTION = """
|
|
59 |
<td>Rule34 style alphabetical tag dump; artist/copyright/character
|
60 |
prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
61 |
<tr><td><strong>Booru-like tag list</strong></td>
|
62 |
-
<td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
63 |
<tr><td><strong>Art Critic</strong></td>
|
64 |
<td>Paragraph of art-historical commentary: composition, symbolism, style,
|
65 |
lighting, movement, etc.</td></tr>
|
@@ -163,6 +165,8 @@ CAPTION_TYPE_MAP = {
|
|
163 |
"Write a {length} caption for this image as if it were being used for a social media post.",
|
164 |
],
|
165 |
}
|
|
|
|
|
166 |
|
167 |
|
168 |
|
@@ -194,6 +198,11 @@ def build_prompt(caption_type: str, caption_length: str | int, extra_options: li
|
|
194 |
)
|
195 |
|
196 |
|
|
|
|
|
|
|
|
|
|
|
197 |
@spaces.GPU()
|
198 |
@torch.no_grad()
|
199 |
def chat_joycaption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
|
@@ -271,7 +280,7 @@ with gr.Blocks() as demo:
|
|
271 |
with gr.Accordion("Extra Options", open=False):
|
272 |
extra_options = gr.CheckboxGroup(
|
273 |
choices=[
|
274 |
-
|
275 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
276 |
"Include information about lighting.",
|
277 |
"Include information about camera angle.",
|
@@ -302,7 +311,7 @@ with gr.Blocks() as demo:
|
|
302 |
label="Select one or more",
|
303 |
)
|
304 |
|
305 |
-
name_input = gr.Textbox(label="Person / Character Name")
|
306 |
|
307 |
with gr.Accordion("Generation settings", open=False):
|
308 |
temperature_slider = gr.Slider(
|
@@ -325,6 +334,13 @@ with gr.Blocks() as demo:
|
|
325 |
with gr.Column():
|
326 |
prompt_box = gr.Textbox(lines=4, label="Prompt", interactive=True)
|
327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
# Auto-update prompt box whenever any of the inputs change
|
329 |
for ctrl in (caption_type, caption_length, extra_options, name_input):
|
330 |
ctrl.change(
|
|
|
13 |
gap:16px; margin:4px 0 12px;}
|
14 |
.joy-header h1{margin:0; font-size:1.9rem; line-height:1.2;}
|
15 |
.joy-header p {margin:2px 0 0; font-size:0.9rem; color:#666;}
|
16 |
+
.joy-header img{height:56px;}
|
17 |
</style>
|
18 |
|
19 |
<div class="joy-header">
|
20 |
+
<img src="logo.svg" alt="JoyCaption logo">
|
21 |
<div>
|
22 |
<h1>JoyCaption <span style="font-weight:400">Beta One</span></h1>
|
23 |
+
<p>Image-captioning model | build mb3500zp</p>
|
24 |
</div>
|
25 |
</div>
|
26 |
<hr>"""
|
|
|
48 |
<tr><td><strong>Straightforward</strong></td>
|
49 |
<td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
|
50 |
<tr><td><strong>Stable Diffusion Prompt</strong></td>
|
51 |
+
<td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time.</em></td></tr>
|
52 |
<tr><td><strong>MidJourney</strong></td>
|
53 |
+
<td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time.</em></td></tr>
|
54 |
<tr><td><strong>Danbooru tag list</strong></td>
|
55 |
<td>Comma-separated tags strictly following Danbooru conventions
|
56 |
+
(artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time..</em></td></tr>
|
57 |
<tr><td><strong>e621 tag list</strong></td>
|
58 |
<td>Alphabetical, namespaced tags in e621 style – includes species/meta
|
59 |
tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
|
|
61 |
<td>Rule34 style alphabetical tag dump; artist/copyright/character
|
62 |
prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
63 |
<tr><td><strong>Booru-like tag list</strong></td>
|
64 |
+
<td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time..</em></td></tr>
|
65 |
<tr><td><strong>Art Critic</strong></td>
|
66 |
<td>Paragraph of art-historical commentary: composition, symbolism, style,
|
67 |
lighting, movement, etc.</td></tr>
|
|
|
165 |
"Write a {length} caption for this image as if it were being used for a social media post.",
|
166 |
],
|
167 |
}
|
168 |
+
NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
|
169 |
+
|
170 |
|
171 |
|
172 |
|
|
|
198 |
)
|
199 |
|
200 |
|
201 |
+
def toggle_name_box(selected_options: list[str]):
|
202 |
+
"""Show the name textbox only when the specific option is selected."""
|
203 |
+
return gr.update(visible=NAME_OPTION in selected_options)
|
204 |
+
|
205 |
+
|
206 |
@spaces.GPU()
|
207 |
@torch.no_grad()
|
208 |
def chat_joycaption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
|
|
|
280 |
with gr.Accordion("Extra Options", open=False):
|
281 |
extra_options = gr.CheckboxGroup(
|
282 |
choices=[
|
283 |
+
NAME_OPTION,
|
284 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
285 |
"Include information about lighting.",
|
286 |
"Include information about camera angle.",
|
|
|
311 |
label="Select one or more",
|
312 |
)
|
313 |
|
314 |
+
name_input = gr.Textbox(label="Person / Character Name", visible=False)
|
315 |
|
316 |
with gr.Accordion("Generation settings", open=False):
|
317 |
temperature_slider = gr.Slider(
|
|
|
334 |
with gr.Column():
|
335 |
prompt_box = gr.Textbox(lines=4, label="Prompt", interactive=True)
|
336 |
|
337 |
+
# Show the name input box only when the specific option is selected
|
338 |
+
extra_options.change(
|
339 |
+
toggle_name_box,
|
340 |
+
inputs=extra_options,
|
341 |
+
outputs=name_input,
|
342 |
+
)
|
343 |
+
|
344 |
# Auto-update prompt box whenever any of the inputs change
|
345 |
for ctrl in (caption_type, caption_length, extra_options, name_input):
|
346 |
ctrl.change(
|