File size: 2,712 Bytes
f667084 a04adbd 4d54b56 cf98329 a04adbd f667084 a04adbd f667084 a04adbd f667084 a04adbd f418994 9023169 a04adbd 9023169 4d54b56 5ecb8ce 0276b97 5ecb8ce 4d54b56 cf98329 4d54b56 9023169 f66faee 9023169 6c3b8ac 4878b0f 4d54b56 9023169 4d54b56 9023169 a81e611 f667084 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
from huggingface_hub import login
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import easyocr
# Get Hugging Face Token from environment variable
hf_token = os.getenv('HF_AUTH_TOKEN')
if not hf_token:
raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)
# Load the processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
import gradio as gr
from diffusers import DiffusionPipeline
import torch
import spaces # Hugging Face Spaces module
# Initialize the model
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe.to(device)
model.to(device)
@spaces.GPU(duration=300)
def generate_caption_and_image(image):
img = image.convert("RGB")
# reader = easyocr.Reader(['en'])
# result = reader.readtext(img)
import random
# Define lists for the three variables
fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
patterns = ['striped', 'floral', 'geometric', 'abstract', 'solid', 'polka dots']
textile_designs = ['woven texture', 'embroidery', 'printed fabric', 'hand-dyed', 'quilting']
# Randomly select one from each category
selected_fabric = random.choice(fabrics)
selected_pattern = random.choice(patterns)
selected_textile_design = random.choice(textile_designs)
# Generate caption
inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
inputs = {key: val.to(device) for key, val in inputs.items()}
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
prompt = f'''Create a highly realistic clothing item based on the following description: The design should reflect {caption}, featuring a highly realistic and modern piece of clothing that incorporates stylish and high-quality textures, exuding sophistication with realistic fabric lighting and fine details, subtly hinting at {selected_fabric}, with a {selected_pattern} motif and a {selected_textile_design} style.'''
# Generate image based on the caption
generated_image = pipe(prompt).images[0]
return caption, generated_image
# Gradio UI
iface = gr.Interface(
fn=generate_caption_and_image,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=[gr.Textbox(label="Generated Caption"), gr.Image(label="Generated Design")],
live=True
)
iface.launch(share=True)
|