File size: 4,708 Bytes
f667084 a04adbd c6c4d1c 4d54b56 90c9be8 0372f7c fa3a39e a04adbd 264752e 50b4bab 264752e dcf269f 264752e 01b1364 0372f7c 3891dec a04adbd f667084 a04adbd 90c9be8 f2cf99b f667084 50b4bab c6c4d1c a04adbd c6c4d1c 55b9907 50b4bab 55b9907 dcf269f 50b4bab 3891dec fa3a39e f418994 dcf269f 948eab4 f418994 33407d1 9023169 a04adbd c83e28c febdafe 8bbdb99 eb4cf9a c6c4d1c eb4cf9a 01b1364 eb4cf9a c6c4d1c eb4cf9a c6c4d1c eb4cf9a 0c3147e 264752e c6c4d1c eb4cf9a c6c4d1c 264752e c6c4d1c eb4cf9a 55b9907 eb4cf9a 55b9907 c6c4d1c dbf999c c6c4d1c dbf999c c6c4d1c 6833ac1 9023169 1b6a75c 0ef1f3f 6833ac1 9023169 a81e611 f667084 0ef1f3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import os
from huggingface_hub import login
from transformers import BlipProcessor, BlipForConditionalGeneration
from transformers import MllamaForConditionalGeneration, AutoProcessor
from PIL import Image
from dotenv import load_dotenv
import gradio as gr
from diffusers import DiffusionPipeline
import torch
import spaces # Hugging Face Spaces module
import requests
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
from diffusers import DiffusionPipeline
fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
patterns = ['striped', 'floral', 'geometric', 'abstract', 'solid', 'polka dots']
textile_designs = ['woven texture', 'embroidery', 'printed fabric', 'hand-dyed', 'quilting']
# Get Hugging Face Token from environment variable
HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
# Authenticate using the token
login(token ="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")
if not hf_token:
raise ValueError("Hugging Face token is not set in the environment variables.")
login(token=hf_token)
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
# Load the processor and model
# processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
# model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
# processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
# model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
from diffusers import FluxPipeline
pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
device = "cuda" if torch.cuda.is_available() else "cpu"
# pipe.to(device)
model.to(device)
pipe.to(device)
@spaces.GPU(duration=150)
def generate_caption_and_image(image, f, p, d):
if f!=None and p!=None and d!=None and image!=None:
img = image.convert("RGB")
# reader = easyocr.Reader(['en'])
# # result = reader.readtext(img)
# import random
# text = "a picture of "
# inputs = processor(img, text, return_tensors="pt").to(device)
# out = model2.generate(**inputs, num_beams = 3)
# caption2 = processor.decode(out[0], skip_special_tokens=True)
# Generate caption
# inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
# inputs = {key: val.to(device) for key, val in inputs.items()}
# out = model.generate(**inputs)
# caption1 = processor.decode(out[0], skip_special_tokens=True)
# prompt = f"Design a high-quality, stylish clothing item that seamlessly blends the essence of {caption1} and {caption2}. The design should prominently feature {f}{d} and incorporate {p}. The final piece should exude sophistication and creativity, suitable for modern trends while retaining an element of timeless appeal. Ensure the textures and patterns complement each other harmoniously, creating a visually striking yet wearable garment."
# # Generate image based on the caption
# generated_image = pipe(prompt).images[0]
# generated_image1 =pipe(prompt).images[0]
# return generated_image, generated_image1
messages = [{"role": "user", "content": [{"type": "image"},{"type": "text", "text": "If I had to write a haiku for this one, it would be: "}]}]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(img,input_text,add_special_tokens=False,return_tensors="pt").to(device)
output = model.generate(**inputs, max_new_tokens=30)
caption =processor.decode(output[0])
image = pipe(caption,height=1024,width=1024,guidance_scale=3.5,num_inference_steps=50,max_sequence_length=512,generator=torch.Generator("cpu").manual_seed(0)).images[0]
return image
return None
# Gradio UI
iface = gr.Interface(
fn=generate_caption_and_image,
inputs=[gr.Image(type="pil", label="Upload Image"), gr.Radio(fabrics, label="Select Fabric"), gr.Radio(patterns, label="Select Pattern"), gr.Radio(textile_designs, label="Select Textile Design")],
outputs=[gr.Image(label="Generated Design 1")],
live=True
)
iface.launch(share=True)
|