Fiqa commited on
Commit
a04adbd
·
verified ·
1 Parent(s): ea2971c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -44
app.py CHANGED
@@ -1,54 +1,51 @@
1
  import os
2
- import requests
3
- from PIL import Image
4
- import torch
5
- import gradio as gr
6
  from huggingface_hub import login
7
- from transformers import AutoProcessor, AutoModelForCausalLM
8
- from diffusers import DiffusionPipeline
 
9
 
10
- # Hugging Face token setup
11
  hf_token = os.getenv('HF_AUTH_TOKEN')
12
  if not hf_token:
13
  raise ValueError("Hugging Face token is not set in the environment variables.")
14
  login(token=hf_token)
15
 
16
- # Initialize Stable Diffusion pipeline
17
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
 
 
 
 
 
18
 
19
- # Initialize captioning model and processor
20
- caption_model_name = "pretrained-caption-model" # Replace with the actual model name
21
- processor = AutoProcessor.from_pretrained(caption_model_name)
22
- model = AutoModelForCausalLM.from_pretrained(caption_model_name)
23
-
24
- # Check for GPU availability (handled automatically by Hugging Face Spaces)
25
- device = "cuda" if torch.cuda.is_available() else "cpu"
26
- pipe.to(device)
27
- model.to(device)
28
-
29
- # Function to process the image and generate caption and design
30
- @spaces.GPU
31
- def generate_caption_and_design(image):
32
- # Generate caption
33
- inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
34
- inputs = {key: val.to(device) for key, val in inputs.items()}
35
- out = model.generate(**inputs)
36
- caption = processor.decode(out[0], skip_special_tokens=True)
37
-
38
- # Generate design based on caption
39
- generated_image = pipe(caption).images[0]
40
-
41
- return caption, generated_image
42
-
43
- # Gradio Interface
44
- interface = gr.Interface(
45
- fn=generate_caption_and_design,
46
- inputs=gr.Image(type="pil", label="Upload an Image"),
47
- outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
48
- title="Image Caption and Design Generator",
49
- description="Upload an image or provide an image URL to generate a caption and use it to create a similar design.",
50
- )
51
-
52
- # Launch Gradio app
53
- interface.launch()
54
 
 
1
  import os
 
 
 
 
2
  from huggingface_hub import login
3
+ from transformers import BlipProcessor, BlipForConditionalGeneration
4
+
5
+
6
 
7
+ # Get Hugging Face Token from environment variable
8
  hf_token = os.getenv('HF_AUTH_TOKEN')
9
  if not hf_token:
10
  raise ValueError("Hugging Face token is not set in the environment variables.")
11
  login(token=hf_token)
12
 
13
+ # Load the processor and model
14
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
15
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
16
+ import gradio as gr
17
+ from diffusers import DiffusionPipeline
18
+ import torch
19
+ import spaces # Hugging Face Spaces module
20
 
21
+ # Initialize the model
22
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
23
+ st.title("Image Caption Generator")
24
+ st.write("Upload an image or provide an image URL to generate its caption.")
25
+
26
+ # Option for image upload
27
+ img_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
28
+ if img_file is not None:
29
+ raw_image = Image.open(img_file).convert('RGB')
30
+ text = "a photography of"
31
+ inputs = processor(raw_image, text, return_tensors="pt", padding =True, truncation=True, max_length =250)
32
+ out = model.generate(**inputs)
33
+ caption = processor.decode(out[0], skip_special_tokens=True)
34
+
35
+ @spaces.GPU(duration=300)
36
+ def generate_image(prompt):
37
+ # Move the model to GPU if available
38
+ device = "cuda" if torch.cuda.is_available() else "cpu"
39
+ pipe.to(device)
40
+ image = pipe(prompt).images[0]
41
+ return image
42
+
43
+ # Create the Gradio interface
44
+ iface = gr.Interface(fn=generate_image,
45
+ inputs=caption,
46
+ outputs=gr.Image(label="Generated Image"),
47
+ title="Astronaut in a Jungle Model")
48
+
49
+ # Launch the interface
50
+ iface.launch(share=True)
 
 
 
 
 
51