Fiqa commited on
Commit
3e3453c
·
verified ·
1 Parent(s): 3a94231

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -80
app.py CHANGED
@@ -1,113 +1,112 @@
1
  import os
2
- from huggingface_hub import login
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
- from transformers import MllamaForConditionalGeneration, AutoProcessor
5
  from PIL import Image
6
  from dotenv import load_dotenv
7
-
8
  import gradio as gr
9
- from diffusers import DiffusionPipeline
10
  import torch
11
  import spaces # Hugging Face Spaces module
12
 
13
- import requests
14
- from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
15
- from qwen_vl_utils import process_vision_info
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- from diffusers import DiffusionPipeline
 
 
 
18
 
 
 
19
 
 
 
20
 
 
 
 
 
 
21
  fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
22
  patterns = ['striped', 'floral', 'geometric', 'abstract', 'solid', 'polka dots']
23
  textile_designs = ['woven texture', 'embroidery', 'printed fabric', 'hand-dyed', 'quilting']
24
 
25
-
26
-
27
-
28
- # Get Hugging Face Token from environment variable
29
- HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
30
-
31
- # Authenticate using the token
32
- login(token =HUGGINGFACE_TOKEN)
33
-
34
-
35
-
36
-
37
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
38
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
39
- processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
40
- model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
41
-
42
- from diffusers import FluxPipeline
43
-
44
- pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
45
-
46
-
47
-
48
-
49
- device = "cuda" if torch.cuda.is_available() else "cpu"
50
- # pipe.to(device)
51
-
52
- model.to(device)
53
- pipe.to(device)
54
- model2.to(device)
55
-
56
-
57
-
58
  @spaces.GPU(duration=150)
59
  def generate_caption_and_image(image, f, p, d):
60
- if f!=None and p!=None and d!=None and image!=None:
61
  img = image.convert("RGB")
62
- # reader = easyocr.Reader(['en'])
63
- # # result = reader.readtext(img)
64
- # import random
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
-
73
- text = "a picture of "
74
- inputs = processor(img, text, return_tensors="pt").to(device)
75
-
76
- out = model2.generate(**inputs, num_beams = 3)
77
-
78
-
79
-
80
  caption2 = processor1.decode(out[0], skip_special_tokens=True)
81
-
82
-
83
  inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
84
- inputs = {key: val.to(device) for key, val in inputs.items()}
85
  out = model.generate(**inputs)
86
  caption1 = processor.decode(out[0], skip_special_tokens=True)
87
- prompt = f"Design a high-quality, stylish clothing item that flawlessly combines the essence of {caption1} and {caption2}. The design should emphasize the luxurious feel and practicality of {f} fabric, while integrating intricate {d} textual design elements. Incorporate {p} patterns that elevate the garment's aesthetic, ensuring a harmonious blend of textures and visuals. The final piece should be both sophisticated and innovative, reflecting modern trends while preserving timeless elegance. The design should be bold, wearable, and a true work of art."
88
-
89
-
90
-
91
-
92
-
93
 
94
-
95
-
96
-
97
-
98
-
99
- image = pipe(prompt,height=1024,width=1024,guidance_scale=3.5,num_inference_steps=50,max_sequence_length=512,generator=torch.Generator("cpu").manual_seed(0)).images[0]
100
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return None
 
 
102
  # Gradio UI
 
103
  iface = gr.Interface(
104
  fn=generate_caption_and_image,
105
- inputs=[gr.Image(type="pil", label="Upload Image"), gr.Radio(fabrics, label="Select Fabric"), gr.Radio(patterns, label="Select Pattern"), gr.Radio(textile_designs, label="Select Textile Design")],
106
-
107
- outputs=[gr.Image(label="Generated Design 1")],
 
 
 
 
108
  live=True
109
  )
110
- iface.launch(share=True)
 
 
 
 
 
 
 
 
111
 
112
 
113
 
 
1
  import os
2
+ from huggingface_hub import login, snapshot_download
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
 
4
  from PIL import Image
5
  from dotenv import load_dotenv
 
6
  import gradio as gr
7
+ from diffusers import FluxPipeline
8
  import torch
9
  import spaces # Hugging Face Spaces module
10
 
11
+ # -----------------------
12
+ # Pre-cache models at startup
13
+ # -----------------------
14
+ snapshot_download("Salesforce/blip-image-captioning-large", timeout=120)
15
+ snapshot_download("noamrot/FuseCap", timeout=120)
16
+ snapshot_download("black-forest-labs/FLUX.1-dev", timeout=300)
17
+
18
+ # -----------------------
19
+ # Authentication
20
+ # -----------------------
21
+ load_dotenv()
22
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
23
+ if HUGGINGFACE_TOKEN:
24
+ login(token=HUGGINGFACE_TOKEN)
25
 
26
+ # -----------------------
27
+ # Load models
28
+ # -----------------------
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
 
31
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large", timeout=120)
32
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", timeout=120).to(device)
33
 
34
+ processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap", timeout=120)
35
+ model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap", timeout=120).to(device)
36
 
37
+ pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, timeout=300).to(device)
38
+
39
+ # -----------------------
40
+ # Options
41
+ # -----------------------
42
  fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
43
  patterns = ['striped', 'floral', 'geometric', 'abstract', 'solid', 'polka dots']
44
  textile_designs = ['woven texture', 'embroidery', 'printed fabric', 'hand-dyed', 'quilting']
45
 
46
+ # -----------------------
47
+ # Inference Function
48
+ # -----------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  @spaces.GPU(duration=150)
50
  def generate_caption_and_image(image, f, p, d):
51
+ if image and f and p and d:
52
  img = image.convert("RGB")
53
+
54
+ # Caption with FuseCap
55
+ inputs = processor(img, "a picture of ", return_tensors="pt").to(device)
56
+ out = model2.generate(**inputs, num_beams=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  caption2 = processor1.decode(out[0], skip_special_tokens=True)
58
+
59
+ # Caption with BLIP
60
  inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
61
+ inputs = {k: v.to(device) for k, v in inputs.items()}
62
  out = model.generate(**inputs)
63
  caption1 = processor.decode(out[0], skip_special_tokens=True)
 
 
 
 
 
 
64
 
65
+ # Compose prompt
66
+ prompt = (
67
+ f"Design a high-quality, stylish clothing item that combines the essence of {caption1} and {caption2}. "
68
+ f"Use luxurious {f} fabric with intricate {d} design elements. "
69
+ f"Incorporate {p} patterns to elevate the garment's aesthetic. "
70
+ "Ensure sophistication, innovation, and timeless elegance."
71
+ )
72
+
73
+ # Generate image
74
+ result = pipe(
75
+ prompt,
76
+ height=1024,
77
+ width=1024,
78
+ guidance_scale=3.5,
79
+ num_inference_steps=50,
80
+ max_sequence_length=512,
81
+ generator=torch.Generator('cpu').manual_seed(0)
82
+ ).images[0]
83
+
84
+ return result
85
  return None
86
+
87
+ # -----------------------
88
  # Gradio UI
89
+ # -----------------------
90
  iface = gr.Interface(
91
  fn=generate_caption_and_image,
92
+ inputs=[
93
+ gr.Image(type="pil", label="Upload Image"),
94
+ gr.Radio(fabrics, label="Select Fabric"),
95
+ gr.Radio(patterns, label="Select Pattern"),
96
+ gr.Radio(textile_designs, label="Select Textile Design")
97
+ ],
98
+ outputs=gr.Image(label="Generated Design"),
99
  live=True
100
  )
101
+
102
+ iface.launch()
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
 
111
 
112