File size: 3,520 Bytes
f667084 a04adbd 4d54b56 0372f7c fa3a39e a04adbd 264752e 50b4bab 264752e dcf269f 264752e 01b1364 0372f7c 3891dec a04adbd f667084 a04adbd f667084 50b4bab a04adbd 264752e 43c59b0 50b4bab dcf269f 50b4bab 3891dec fa3a39e f418994 dcf269f 264752e f418994 33407d1 9023169 a04adbd c83e28c febdafe 8bbdb99 eb4cf9a 01b1364 eb4cf9a 0c3147e 264752e eb4cf9a 264752e 5c7b182 eb4cf9a 5c7b182 eb4cf9a 5c7b182 9023169 1b6a75c 0ef1f3f 5c7b182 9023169 a81e611 f667084 0ef1f3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
from huggingface_hub import login
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import gradio as gr
from diffusers import DiffusionPipeline
import torch
import spaces # Hugging Face Spaces module
import requests
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
from diffusers import DiffusionPipeline
fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
patterns = ['striped', 'floral', 'geometric', 'abstract', 'solid', 'polka dots']
textile_designs = ['woven texture', 'embroidery', 'printed fabric', 'hand-dyed', 'quilting']
# Get Hugging Face Token from environment variable
hf_token = os.getenv('HF_AUTH_TOKEN')
if not hf_token:
raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)
# Load the processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
device = "cuda" if torch.cuda.is_available() else "cpu"
# pipe.to(device)
model2.to(device)
model.to(device)
pipe.to(device)
@spaces.GPU(duration=150)
def generate_caption_and_image(image, f, p, d):
if f!=None and p!=None and d!=None and image!=None:
img = image.convert("RGB")
# reader = easyocr.Reader(['en'])
# # result = reader.readtext(img)
import random
text = "a picture of "
inputs = processor(img, text, return_tensors="pt").to(device)
out = model2.generate(**inputs, num_beams = 3)
caption2 = processor.decode(out[0], skip_special_tokens=True)
# Generate caption
inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
inputs = {key: val.to(device) for key, val in inputs.items()}
out = model.generate(**inputs)
caption1 = processor.decode(out[0], skip_special_tokens=True)
prompt = (f"Create a visually stunning clothing item inspired by: 1. Primary Context: {caption1}, describing the mood and thematic elements of the image. 2. Secondary Insights: {caption2}, providing complementary attributes and textures. 3. Fabric: '{f}', highlighting its qualities. 4. Pattern: '{p}', enhancing visual harmony. 5. Design Style: '{d}', for a refined finish. Use a clean grey/white background with realistic lighting and intricate details for a polished presentation.")
# Generate image based on the caption
generated_image = pipe(prompt).images[0]
generated_image1 =pipe(prompt).images[0]
return generated_image, generated_image1
# Gradio UI
iface = gr.Interface(
fn=generate_caption_and_image,
inputs=[gr.Image(type="pil", label="Upload Image"), gr.Radio(fabrics, label="Select Fabric"), gr.Radio(patterns, label="Select Pattern"), gr.Radio(textile_designs, label="Select Textile Design")],
outputs=[gr.Image(label="Generated Design 1"), gr.Image(label="Generated Design 2")],
live=True
)
iface.launch(share=True)
|