fancyfeast commited on
Commit
ccb684e
·
1 Parent(s): 301ae18
Files changed (2) hide show
  1. JoyCaptionLogo1.svg +10 -0
  2. app.py +23 -7
JoyCaptionLogo1.svg ADDED
app.py CHANGED
@@ -13,12 +13,14 @@ TITLE = """<style>
13
  gap:16px; margin:4px 0 12px;}
14
  .joy-header h1{margin:0; font-size:1.9rem; line-height:1.2;}
15
  .joy-header p {margin:2px 0 0; font-size:0.9rem; color:#666;}
 
16
  </style>
17
 
18
  <div class="joy-header">
 
19
  <div>
20
  <h1>JoyCaption <span style="font-weight:400">Beta&nbsp;One</span></h1>
21
- <p>Image-captioning model &nbsp;|&nbsp; build 2025-05-10a</p>
22
  </div>
23
  </div>
24
  <hr>"""
@@ -46,12 +48,12 @@ DESCRIPTION = """
46
  <tr><td><strong>Straightforward</strong></td>
47
  <td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
48
  <tr><td><strong>Stable Diffusion Prompt</strong></td>
49
- <td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3 % of the time.</em></td></tr>
50
  <tr><td><strong>MidJourney</strong></td>
51
- <td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3 % of the time.</em></td></tr>
52
  <tr><td><strong>Danbooru tag list</strong></td>
53
  <td>Comma-separated tags strictly following Danbooru conventions
54
- (artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
55
  <tr><td><strong>e621 tag list</strong></td>
56
  <td>Alphabetical, namespaced tags in e621 style – includes species/meta
57
  tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
@@ -59,7 +61,7 @@ DESCRIPTION = """
59
  <td>Rule34 style alphabetical tag dump; artist/copyright/character
60
  prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
61
  <tr><td><strong>Booru-like tag list</strong></td>
62
- <td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
63
  <tr><td><strong>Art Critic</strong></td>
64
  <td>Paragraph of art-historical commentary: composition, symbolism, style,
65
  lighting, movement, etc.</td></tr>
@@ -163,6 +165,8 @@ CAPTION_TYPE_MAP = {
163
  "Write a {length} caption for this image as if it were being used for a social media post.",
164
  ],
165
  }
 
 
166
 
167
 
168
 
@@ -194,6 +198,11 @@ def build_prompt(caption_type: str, caption_length: str | int, extra_options: li
194
  )
195
 
196
 
 
 
 
 
 
197
  @spaces.GPU()
198
  @torch.no_grad()
199
  def chat_joycaption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
@@ -271,7 +280,7 @@ with gr.Blocks() as demo:
271
  with gr.Accordion("Extra Options", open=False):
272
  extra_options = gr.CheckboxGroup(
273
  choices=[
274
- "If there is a person/character in the image you must refer to them as {name}.",
275
  "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
276
  "Include information about lighting.",
277
  "Include information about camera angle.",
@@ -302,7 +311,7 @@ with gr.Blocks() as demo:
302
  label="Select one or more",
303
  )
304
 
305
- name_input = gr.Textbox(label="Person / Character Name")
306
 
307
  with gr.Accordion("Generation settings", open=False):
308
  temperature_slider = gr.Slider(
@@ -325,6 +334,13 @@ with gr.Blocks() as demo:
325
  with gr.Column():
326
  prompt_box = gr.Textbox(lines=4, label="Prompt", interactive=True)
327
 
 
 
 
 
 
 
 
328
  # Auto-update prompt box whenever any of the inputs change
329
  for ctrl in (caption_type, caption_length, extra_options, name_input):
330
  ctrl.change(
 
13
  gap:16px; margin:4px 0 12px;}
14
  .joy-header h1{margin:0; font-size:1.9rem; line-height:1.2;}
15
  .joy-header p {margin:2px 0 0; font-size:0.9rem; color:#666;}
16
+ .joy-header img{height:56px;}
17
  </style>
18
 
19
  <div class="joy-header">
20
+ <img src="logo.svg" alt="JoyCaption logo">
21
  <div>
22
  <h1>JoyCaption <span style="font-weight:400">Beta&nbsp;One</span></h1>
23
+ <p>Image-captioning model &nbsp;|&nbsp; build mb3500zp</p>
24
  </div>
25
  </div>
26
  <hr>"""
 
48
  <tr><td><strong>Straightforward</strong></td>
49
  <td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
50
  <tr><td><strong>Stable Diffusion Prompt</strong></td>
51
+ <td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time.</em></td></tr>
52
  <tr><td><strong>MidJourney</strong></td>
53
+ <td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time.</em></td></tr>
54
  <tr><td><strong>Danbooru tag list</strong></td>
55
  <td>Comma-separated tags strictly following Danbooru conventions
56
+ (artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time..</em></td></tr>
57
  <tr><td><strong>e621 tag list</strong></td>
58
  <td>Alphabetical, namespaced tags in e621 style – includes species/meta
59
  tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
 
61
  <td>Rule34 style alphabetical tag dump; artist/copyright/character
62
  prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
63
  <tr><td><strong>Booru-like tag list</strong></td>
64
+ <td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time..</em></td></tr>
65
  <tr><td><strong>Art Critic</strong></td>
66
  <td>Paragraph of art-historical commentary: composition, symbolism, style,
67
  lighting, movement, etc.</td></tr>
 
165
  "Write a {length} caption for this image as if it were being used for a social media post.",
166
  ],
167
  }
168
+ NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
169
+
170
 
171
 
172
 
 
198
  )
199
 
200
 
201
+ def toggle_name_box(selected_options: list[str]):
202
+ """Show the name textbox only when the specific option is selected."""
203
+ return gr.update(visible=NAME_OPTION in selected_options)
204
+
205
+
206
  @spaces.GPU()
207
  @torch.no_grad()
208
  def chat_joycaption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
 
280
  with gr.Accordion("Extra Options", open=False):
281
  extra_options = gr.CheckboxGroup(
282
  choices=[
283
+ NAME_OPTION,
284
  "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
285
  "Include information about lighting.",
286
  "Include information about camera angle.",
 
311
  label="Select one or more",
312
  )
313
 
314
+ name_input = gr.Textbox(label="Person / Character Name", visible=False)
315
 
316
  with gr.Accordion("Generation settings", open=False):
317
  temperature_slider = gr.Slider(
 
334
  with gr.Column():
335
  prompt_box = gr.Textbox(lines=4, label="Prompt", interactive=True)
336
 
337
+ # Show the name input box only when the specific option is selected
338
+ extra_options.change(
339
+ toggle_name_box,
340
+ inputs=extra_options,
341
+ outputs=name_input,
342
+ )
343
+
344
  # Auto-update prompt box whenever any of the inputs change
345
  for ctrl in (caption_type, caption_length, extra_options, name_input):
346
  ctrl.change(