File size: 3,173 Bytes
f667084
 
a04adbd
4d54b56
0372f7c
fa3a39e
 
 
 
a04adbd
264752e
50b4bab
 
264752e
dcf269f
 
264752e
 
01b1364
 
 
0372f7c
3891dec
a04adbd
f667084
a04adbd
f667084
 
 
 
 
50b4bab
 
a04adbd
 
 
264752e
 
43c59b0
50b4bab
dcf269f
50b4bab
3891dec
fa3a39e
f418994
dcf269f
264752e
f418994
33407d1
9023169
 
a04adbd
c83e28c
9023169
4d54b56
5ecb8ce
f469466
252a605
01b1364
 
 
50b4bab
0c3147e
 
0276b97
0ef1f3f
264752e
 
 
43c59b0
264752e
 
0372f7c
264752e
9023169
 
f66faee
9023169
 
01a8184
0372f7c
ae4c909
84bafee
ae4c909
84bafee
257a388
4878b0f
ca650cc
9023169
 
43c59b0
9023169
45289ed
9023169
 
 
 
0ef1f3f
 
 
 
 
9023169
 
 
a81e611
f667084
0ef1f3f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
from huggingface_hub import login
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

import gradio as gr
from diffusers import DiffusionPipeline
import torch
import spaces  # Hugging Face Spaces module

import requests
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info

from diffusers import DiffusionPipeline



fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
patterns = ['striped', 'floral', 'geometric', 'abstract', 'solid', 'polka dots']
textile_designs = ['woven texture', 'embroidery', 'printed fabric', 'hand-dyed', 'quilting']




# Get Hugging Face Token from environment variable
hf_token = os.getenv('HF_AUTH_TOKEN')
if not hf_token:
    raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)



# Load the processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")





device = "cuda" if torch.cuda.is_available() else "cpu"
# pipe.to(device)
model2.to(device)
model.to(device)
pipe.to(device)



@spaces.GPU(duration=150)
def generate_caption_and_image(image):
    img = image.convert("RGB")
    # reader = easyocr.Reader(['en'])  
    # # result = reader.readtext(img)
    import random
    




        

 
    text = "a picture of "
    inputs = processor(img, text, return_tensors="pt").to(device)
    
    out = model2.generate(**inputs, num_beams = 3)
   
    

    caption2 = processor.decode(out[0], skip_special_tokens=True)
    
    # Generate caption
    inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    out = model.generate(**inputs)
    caption1 = processor.decode(out[0], skip_special_tokens=True)

    prompt = f"Generate a clothing item using the following details: 1. {caption1} 2. {caption2} 3. Fabric: {selected_fabric} 4. Pattern: {selected_pattern} 5. Design Style: {selected_textile_design}. "

  
    prompt +="The image should have a clean, minimalistic grey or white background, with realistic lighting and fine details, ensuring a sophisticated and polished appearance"


  

    # Generate image based on the caption
    generated_image = pipe(prompt).images[0]

    return prompt, generated_image

# Gradio UI
iface = gr.Interface(
    fn=generate_caption_and_image,
    inputs=[gr.Image(type="pil", label="Upload Image"),
                gr.Radio(fabrics, label="Select Fabric"),
        gr.Radio(patterns, label="Select Pattern"),
        gr.Radio(textile_designs, label="Select Textile Design")]
            
    outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
    live=True
)
iface.launch(share=True)