File size: 2,191 Bytes
f667084
 
a04adbd
4d54b56
cf98329
a04adbd
 
f667084
a04adbd
f667084
 
 
 
 
a04adbd
 
 
 
 
 
 
f667084
a04adbd
 
f418994
 
 
9023169
 
a04adbd
 
9023169
4d54b56
5ecb8ce
 
 
4d54b56
cf98329
4d54b56
 
9023169
 
f66faee
9023169
 
 
5ecb8ce
4d54b56
9023169
 
4d54b56
9023169
 
 
 
 
 
 
 
 
 
a81e611
f667084
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
from huggingface_hub import login
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import easyocr



# Get Hugging Face Token from environment variable
hf_token = os.getenv('HF_AUTH_TOKEN')
if not hf_token:
    raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)

# Load the processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
import gradio as gr
from diffusers import DiffusionPipeline
import torch
import spaces  # Hugging Face Spaces module

# Initialize the model
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe.to(device)
model.to(device)



@spaces.GPU(duration=300)
def generate_caption_and_image(image):
    img = image.convert("RGB")
    # reader = easyocr.Reader(['en'])  
    # result = reader.readtext(img)
    
    

      

    
    # Generate caption
    inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    prompt = f"Create a highly realistic design of a clothing item based on the following description: 'The design should incorporate elements from the extracted text: {result}. The clothing should look realistic, modern, and stylish. Use high-quality fabric textures and realistic lighting to give the design a lifelike appearance. The colors, patterns, and materials should reflect the essence of the caption and extracted text.'"


    # Generate image based on the caption
    generated_image = pipe(prompt).images[0]

    return caption, generated_image

# Gradio UI
iface = gr.Interface(
    fn=generate_caption_and_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
    live=True
)
iface.launch(share=True)