import gradio as gr from gradio_client import Client, file from PIL import Image import requests from io import BytesIO # Initialize the Hugging Face API clients captioning_client = Client("fancyfeast/joy-caption-pre-alpha") generation_client = Client("black-forest-labs/FLUX.1-dev") # Function to caption an image def caption_image(image): caption = captioning_client.predict( input_image=image, api_name="/stream_chat" ) return caption # Function to generate an image from a text prompt using Hugging Face API def generate_image_from_caption(caption): image = generation_client.predict( prompt=caption, seed=0, randomize_seed=True, width=1024, height=1024, guidance_scale=3.5, num_inference_steps=28, api_name="/infer" ) return image # Main function to handle the upload and generate images and captions in a loop def process_image(image, iterations): generated_images = [] captions = [] current_image = image for i in range(iterations): # Caption the current image caption = caption_image(current_image) captions.append(caption) # Generate a new image based on the caption new_image = generate_image_from_caption(caption) generated_images.append(new_image) # Set the newly generated image as the current image for the next iteration current_image = new_image return generated_images, captions # Gradio Interface with gr.Blocks() as demo: with gr.Row(): image_input = gr.Image(type="pil", label="Upload an Image") iterations_input = gr.Number(value=3, label="Number of Iterations") with gr.Row(): output_images = gr.Gallery(label="Generated Images") output_captions = gr.Textbox(label="Generated Captions") generate_button = gr.Button("Generate") generate_button.click( fn=process_image, inputs=[image_input, iterations_input], outputs=[output_images, output_captions] ) # Launch the app demo.launch()