Fiqa commited on
Commit
f469466
·
verified ·
1 Parent(s): e924119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -28
app.py CHANGED
@@ -34,10 +34,10 @@ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-capt
34
  processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
35
  model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
36
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
37
- model3 =model = Qwen2VLForConditionalGeneration.from_pretrained(
38
- "prithivMLmods/Qwen2-VL-OCR-2B-Instruct", torch_dtype="auto", device_map="auto"
39
- )
40
- processor2 = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
41
 
42
 
43
 
@@ -53,29 +53,29 @@ model.to(device)
53
  def generate_caption_and_image(image):
54
  img = image.convert("RGB")
55
  # reader = easyocr.Reader(['en'])
56
- # result = reader.readtext(img)
57
- import random
58
- messages = [{"role": "user", "content": [{"type": "image", "image": img}, {"type": "text", "text": "Describe this Image"}]}]
59
-
60
- text = processor.apply_chat_template(
61
- messages, tokenize=False, add_generation_prompt=True
62
- )
63
- image_inputs, video_inputs = process_vision_info(messages)
64
- inputs = processor(
65
- text=[text],
66
- images=image_inputs,
67
- videos=video_inputs,
68
- padding=True,
69
- return_tensors="pt",
70
- )
71
- inputs = inputs.to(device)
72
- generated_ids = model.generate(**inputs, max_new_tokens=128)
73
- generated_ids_trimmed = [
74
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
75
- ]
76
- result = processor.batch_decode(
77
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
78
- )
79
 
80
 
81
 
@@ -105,7 +105,7 @@ def generate_caption_and_image(image):
105
  out = model.generate(**inputs)
106
  caption1 = processor.decode(out[0], skip_special_tokens=True)
107
 
108
- prompt = f'''Create a highly realistic clothing item based on the following descriptions: The design should reflect {caption1} and {caption2}, blending both themes into a single, stylish, and modern piece of clothing. Incorporate highly realistic and high-quality textures that exude sophistication, with realistic fabric lighting and fine details. Subtly hint at {selected_fabric}, featuring a {selected_pattern} motif and a {selected_textile_design} style that harmoniously balances the essence of both captions.and {result} should be written on top of it'''
109
 
110
 
111
 
 
34
  processor1 = BlipProcessor.from_pretrained("noamrot/FuseCap")
35
  model2 = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
36
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
37
+ # model3 =model = Qwen2VLForConditionalGeneration.from_pretrained(
38
+ # "prithivMLmods/Qwen2-VL-OCR-2B-Instruct", torch_dtype="auto", device_map="auto"
39
+ # )
40
+ # processor2 = AutoProcessor.from_pretrained("prithivMLmods/Qwen2-VL-OCR-2B-Instruct")
41
 
42
 
43
 
 
53
  def generate_caption_and_image(image):
54
  img = image.convert("RGB")
55
  # reader = easyocr.Reader(['en'])
56
+ # # result = reader.readtext(img)
57
+ # import random
58
+ # messages = [{"role": "user", "content": [{"type": "image", "image": img}, {"type": "text", "text": "Describe this Image"}]}]
59
+
60
+ # text = processor.apply_chat_template(
61
+ # messages, tokenize=False, add_generation_prompt=True
62
+ # )
63
+ # image_inputs, video_inputs = process_vision_info(messages)
64
+ # inputs = processor(
65
+ # text=[text],
66
+ # images=image_inputs,
67
+ # videos=video_inputs,
68
+ # padding=True,
69
+ # return_tensors="pt",
70
+ # )
71
+ # inputs = inputs.to(device)
72
+ # generated_ids = model.generate(**inputs, max_new_tokens=128)
73
+ # generated_ids_trimmed = [
74
+ # out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
75
+ # ]
76
+ # result = processor.batch_decode(
77
+ # generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
78
+ # )
79
 
80
 
81
 
 
105
  out = model.generate(**inputs)
106
  caption1 = processor.decode(out[0], skip_special_tokens=True)
107
 
108
+ prompt = f'''Create a highly realistic clothing item based on the following descriptions: The design should reflect {caption1} and {caption2}, blending both themes into a single, stylish, and modern piece of clothing. Incorporate highly realistic and high-quality textures that exude sophistication, with realistic fabric lighting and fine details. Subtly hint at {selected_fabric}, featuring a {selected_pattern} motif and a {selected_textile_design} style that harmoniously balances the essence of both captions.'''
109
 
110
 
111