Fiqa commited on
Commit
9023169
·
verified ·
1 Parent(s): e214171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -26
app.py CHANGED
@@ -20,32 +20,30 @@ import spaces # Hugging Face Spaces module
20
 
21
  # Initialize the model
22
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
23
- st.title("Image Caption Generator")
24
- st.write("Upload an image or provide an image URL to generate its caption.")
25
-
26
- # Option for image upload
27
- img_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
28
- if img_file is not None:
29
- raw_image = Image.open(img_file).convert('RGB')
30
- text = "a photography of"
31
- inputs = processor(raw_image, text, return_tensors="pt", padding =True, truncation=True, max_length =250)
32
- out = model.generate(**inputs)
33
- caption = processor.decode(out[0], skip_special_tokens=True)
34
 
35
  @spaces.GPU(duration=300)
36
- def generate_image(prompt):
37
- # Move the model to GPU if available
38
- device = "cuda" if torch.cuda.is_available() else "cpu"
39
- pipe.to(device)
40
- image = pipe(prompt).images[0]
41
- return image
42
-
43
- # Create the Gradio interface
44
- iface = gr.Interface(fn=generate_image,
45
- inputs=caption,
46
- outputs=gr.Image(label="Generated Image"),
47
- title="Astronaut in a Jungle Model")
48
-
49
- # Launch the interface
50
- iface.launch(share=True)
 
 
 
 
 
 
 
51
 
 
20
 
21
  # Initialize the model
22
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
23
+
24
+
 
 
 
 
 
 
 
 
 
25
 
26
  @spaces.GPU(duration=300)
27
+ def generate_caption_and_image(image):
28
+ # Process the image
29
+ raw_image = image.convert("RGB")
30
+
31
+ # Generate caption
32
+ inputs = processor(raw_image, return_tensors="pt", padding=True, truncation=True, max_length=250)
33
+ inputs = {key: val.to(device) for key, val in inputs.items()}
34
+ out = model.generate(**inputs)
35
+ caption = processor.decode(out[0], skip_special_tokens=True)
36
+
37
+ # Generate image based on the caption
38
+ generated_image = pipe(caption).images[0]
39
+
40
+ return caption, generated_image
41
+
42
+ # Gradio UI
43
+ iface = gr.Interface(
44
+ fn=generate_caption_and_image,
45
+ inputs=gr.Image(type="pil", label="Upload Image"),
46
+ outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
47
+ live=True
48
+ )
49