Fiqa commited on
Commit
264752e
·
verified ·
1 Parent(s): 01a8184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -5
app.py CHANGED
@@ -8,6 +8,22 @@ from diffusers import DiffusionPipeline
8
  import torch
9
  import spaces # Hugging Face Spaces module
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
 
@@ -22,13 +38,14 @@ login(token=hf_token)
22
  # Load the processor and model
23
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
24
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
 
 
 
25
 
26
 
27
-
28
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
  pipe.to(device)
31
-
32
  model.to(device)
33
 
34
 
@@ -50,9 +67,14 @@ def generate_caption_and_image(image):
50
  selected_fabric = random.choice(fabrics)
51
  selected_pattern = random.choice(patterns)
52
  selected_textile_design = random.choice(textile_designs)
 
 
 
 
 
 
53
 
54
-
55
- caption2 =""
56
 
57
  # Generate caption
58
  inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)
 
8
  import torch
9
  import spaces # Hugging Face Spaces module
10
 
11
+ import requests
12
+
13
+
14
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
+
16
+
17
+ img_url = 'https://huggingface.co/spaces/noamrot/FuseCap/resolve/main/bike.jpg'
18
+ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
19
+
20
+ text = "a picture of "
21
+ inputs = processor(raw_image, text, return_tensors="pt").to(device)
22
+
23
+ out = model.generate(**inputs, num_beams = 3)
24
+ print(processor.decode(out[0], skip_special_tokens=True))
25
+
26
+
27
 
28
 
29
 
 
38
  # Load the processor and model
39
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
40
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
41
+ processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
42
+ model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
43
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
44
 
45
 
 
 
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  pipe.to(device)
48
+ model2.to(device)
49
  model.to(device)
50
 
51
 
 
67
  selected_fabric = random.choice(fabrics)
68
  selected_pattern = random.choice(patterns)
69
  selected_textile_design = random.choice(textile_designs)
70
+ text = "a picture of "
71
+ inputs = processor(img, text, return_tensors="pt").to(device)
72
+
73
+ out = model.generate(**inputs, num_beams = 3)
74
+
75
+
76
 
77
+ caption2 = processor.decode(out[0], skip_special_tokens=True)
 
78
 
79
  # Generate caption
80
  inputs = processor(image, return_tensors="pt", padding=True, truncation=True, max_length=250)