Fiqa commited on
Commit
0372f7c
·
verified ·
1 Parent(s): fa3a39e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -2,13 +2,15 @@ import os
2
  from huggingface_hub import login
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
- import easyocr
6
  import gradio as gr
7
  from diffusers import DiffusionPipeline
8
  import torch
9
  import spaces # Hugging Face Spaces module
10
 
11
- from transformers import pipeline
 
 
12
 
13
 
14
 
@@ -22,15 +24,15 @@ login(token=hf_token)
22
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
23
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
24
 
25
-
26
- pipe2= pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 
27
 
28
 
29
- # Initialize the model
30
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  pipe.to(device)
33
- pipe2.to(device)
34
  model.to(device)
35
 
36
 
@@ -41,6 +43,7 @@ def generate_caption_and_image(image):
41
  # reader = easyocr.Reader(['en'])
42
  # result = reader.readtext(img)
43
  import random
 
44
 
45
  # Define lists for the three variables
46
  fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
@@ -54,15 +57,18 @@ def generate_caption_and_image(image):
54
 
55
 
56
 
57
-
 
 
 
58
 
59
 
60
  # Generate caption
61
  inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
62
  inputs = {key: val.to(device) for key, val in inputs.items()}
63
  out = model.generate(**inputs)
64
- caption = processor.decode(out[0], skip_special_tokens=True)
65
- caption2 =pipe2(img)
66
  prompt = f'''Create a highly realistic clothing item based on the following descriptions: The design should reflect {caption1} and {caption2}, blending both themes into a single, stylish, and modern piece of clothing. Incorporate highly realistic and high-quality textures that exude sophistication, with realistic fabric lighting and fine details. Subtly hint at {selected_fabric}, featuring a {selected_pattern} motif and a {selected_textile_design} style that harmoniously balances the essence of both captions.'''
67
 
68
 
 
2
  from huggingface_hub import login
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
+
6
  import gradio as gr
7
  from diffusers import DiffusionPipeline
8
  import torch
9
  import spaces # Hugging Face Spaces module
10
 
11
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
12
+
13
+
14
 
15
 
16
 
 
24
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
25
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
26
 
27
+ model2 = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
28
+ feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
29
+ tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
30
 
31
 
 
32
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
33
  device = "cuda" if torch.cuda.is_available() else "cpu"
34
  pipe.to(device)
35
+ model2.to(device)
36
  model.to(device)
37
 
38
 
 
43
  # reader = easyocr.Reader(['en'])
44
  # result = reader.readtext(img)
45
  import random
46
+
47
 
48
  # Define lists for the three variables
49
  fabrics = ['cotton', 'silk', 'denim', 'linen', 'polyester', 'wool', 'velvet']
 
57
 
58
 
59
 
60
+ pixel_values = feature_extractor(images=[img], return_tensors="pt").pixel_values.to(device)
61
+ output_ids = model.generate(pixel_values, **gen_kwargs)
62
+ caption = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
63
+
64
 
65
 
66
  # Generate caption
67
  inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
68
  inputs = {key: val.to(device) for key, val in inputs.items()}
69
  out = model.generate(**inputs)
70
+ caption2 = processor.decode(out[0], skip_special_tokens=True)
71
+
72
  prompt = f'''Create a highly realistic clothing item based on the following descriptions: The design should reflect {caption1} and {caption2}, blending both themes into a single, stylish, and modern piece of clothing. Incorporate highly realistic and high-quality textures that exude sophistication, with realistic fabric lighting and fine details. Subtly hint at {selected_fabric}, featuring a {selected_pattern} motif and a {selected_textile_design} style that harmoniously balances the essence of both captions.'''
73
 
74