DigiP-AI commited on
Commit
21e32dc
·
verified ·
1 Parent(s): efba86c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -39
app.py CHANGED
@@ -293,6 +293,47 @@ css = """
293
  # Gradio interface
294
  with gr.Blocks(theme=theme, css=css) as app:
295
  gr.HTML("<center><h6>🎨 Image Studio</h6></center>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  with gr.Tab("Text to Image"):
297
  #gr.load("models/digiplay/AnalogMadness-realistic-model-v7")
298
  #gr.load("models/XLabs-AI/flux-RealismLora")
@@ -338,45 +379,7 @@ with gr.Blocks(theme=theme, css=css) as app:
338
  outputs=image_output
339
  )
340
 
341
- with gr.Tab("Image to Prompt"):
342
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
343
-
344
- # Initialize Florence model
345
- device = "cuda" if torch.cuda.is_available() else "cpu"
346
- florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
347
- florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
348
-
349
- # api_key = os.getenv("HF_READ_TOKEN")
350
-
351
- def generate_caption(image):
352
- if not isinstance(image, Image.Image):
353
- image = Image.fromarray(image)
354
-
355
- inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
356
- generated_ids = florence_model.generate(
357
- input_ids=inputs["input_ids"],
358
- pixel_values=inputs["pixel_values"],
359
- max_new_tokens=1024,
360
- early_stopping=False,
361
- do_sample=False,
362
- num_beams=3,
363
- )
364
- generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
365
- parsed_answer = florence_processor.post_process_generation(
366
- generated_text,
367
- task="<MORE_DETAILED_CAPTION>",
368
- image_size=(image.width, image.height)
369
- )
370
- prompt = parsed_answer["<MORE_DETAILED_CAPTION>"]
371
- print("\n\nGeneration completed!:"+ prompt)
372
- return prompt
373
-
374
- io = gr.Interface(generate_caption,
375
- inputs=[gr.Image(label="Input Image")],
376
- outputs = [gr.Textbox(label="Output Prompt", lines=2, show_copy_button = True),
377
- # gr.Image(label="Output Image")
378
- ]
379
- )
380
 
381
  with gr.Tab("Image Upscaler"):
382
  with gr.Row():
 
293
  # Gradio interface
294
  with gr.Blocks(theme=theme, css=css) as app:
295
  gr.HTML("<center><h6>🎨 Image Studio</h6></center>")
296
+
297
+ with gr.Tab("Image to Prompt"):
298
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
299
+
300
+ # Initialize Florence model
301
+ device = "cuda" if torch.cuda.is_available() else "cpu"
302
+ florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
303
+ florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
304
+
305
+ # api_key = os.getenv("HF_READ_TOKEN")
306
+
307
+ def generate_caption(image):
308
+ if not isinstance(image, Image.Image):
309
+ image = Image.fromarray(image)
310
+
311
+ inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
312
+ generated_ids = florence_model.generate(
313
+ input_ids=inputs["input_ids"],
314
+ pixel_values=inputs["pixel_values"],
315
+ max_new_tokens=1024,
316
+ early_stopping=False,
317
+ do_sample=False,
318
+ num_beams=3,
319
+ )
320
+ generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
321
+ parsed_answer = florence_processor.post_process_generation(
322
+ generated_text,
323
+ task="<MORE_DETAILED_CAPTION>",
324
+ image_size=(image.width, image.height)
325
+ )
326
+ prompt = parsed_answer["<MORE_DETAILED_CAPTION>"]
327
+ print("\n\nGeneration completed!:"+ prompt)
328
+ return prompt
329
+
330
+ io = gr.Interface(generate_caption,
331
+ inputs=[gr.Image(label="Input Image")],
332
+ outputs = [gr.Textbox(label="Output Prompt", lines=2, show_copy_button = True),
333
+ # gr.Image(label="Output Image")
334
+ ]
335
+ )
336
+
337
  with gr.Tab("Text to Image"):
338
  #gr.load("models/digiplay/AnalogMadness-realistic-model-v7")
339
  #gr.load("models/XLabs-AI/flux-RealismLora")
 
379
  outputs=image_output
380
  )
381
 
382
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  with gr.Tab("Image Upscaler"):
385
  with gr.Row():