Fiqa commited on
Commit
43c59b0
·
verified ·
1 Parent(s): 4db6b20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -32
app.py CHANGED
@@ -35,14 +35,8 @@ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-larg
35
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
36
  processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
37
  model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
38
- # pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
39
- # model3 =model = Qwen2VLForConditionalGeneration.from_pretrained(
40
- # "prithivMLmods/Qwen2-VL-OCR-2B-Instruct", torch_dtype="auto", device_map="auto"
41
- # )
42
- # processor2 = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
43
 
44
- pipe3 = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev")
45
- pipe3.load_lora_weights("tryonlabs/FLUX.1-dev-LoRA-Outfit-Generator")
46
 
47
 
48
 
@@ -51,7 +45,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
51
  # pipe.to(device)
52
  model2.to(device)
53
  model.to(device)
54
- pip3.to(device)
55
 
56
 
57
 
@@ -61,28 +55,6 @@ def generate_caption_and_image(image):
61
  # reader = easyocr.Reader(['en'])
62
  # # result = reader.readtext(img)
63
  import random
64
- # messages = [{"role": "user", "content": [{"type": "image", "image": img}, {"type": "text", "text": "Describe this Image"}]}]
65
-
66
- # text = processor.apply_chat_template(
67
- # messages, tokenize=False, add_generation_prompt=True
68
- # )
69
- # image_inputs, video_inputs = process_vision_info(messages)
70
- # inputs = processor(
71
- # text=[text],
72
- # images=image_inputs,
73
- # videos=video_inputs,
74
- # padding=True,
75
- # return_tensors="pt",
76
- # )
77
- # inputs = inputs.to(device)
78
- # generated_ids = model.generate(**inputs, max_new_tokens=128)
79
- # generated_ids_trimmed = [
80
- # out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
81
- # ]
82
- # result = processor.batch_decode(
83
- # generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
84
- # )
85
-
86
 
87
 
88
 
@@ -99,7 +71,7 @@ def generate_caption_and_image(image):
99
  text = "a picture of "
100
  inputs = processor(img, text, return_tensors="pt").to(device)
101
 
102
- out = model.generate(**inputs, num_beams = 3)
103
 
104
 
105
 
@@ -116,7 +88,7 @@ def generate_caption_and_image(image):
116
 
117
 
118
  # Generate image based on the caption
119
- generated_image = pipe3(prompt).images[0]
120
 
121
  return prompt, generated_image
122
 
 
35
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
36
  processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
37
  model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
38
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
 
 
 
 
39
 
 
 
40
 
41
 
42
 
 
45
  # pipe.to(device)
46
  model2.to(device)
47
  model.to(device)
48
+ pip.to(device)
49
 
50
 
51
 
 
55
  # reader = easyocr.Reader(['en'])
56
  # # result = reader.readtext(img)
57
  import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
 
 
71
  text = "a picture of "
72
  inputs = processor(img, text, return_tensors="pt").to(device)
73
 
74
+ out = model2.generate(**inputs, num_beams = 3)
75
 
76
 
77
 
 
88
 
89
 
90
  # Generate image based on the caption
91
+ generated_image = pipe(prompt).images[0]
92
 
93
  return prompt, generated_image
94