jihadzakki commited on
Commit
230a39b
·
verified ·
1 Parent(s): 7678417

Update app_dialogue.py

Browse files
Files changed (1) hide show
  1. app_dialogue.py +46 -134
app_dialogue.py CHANGED
@@ -2,15 +2,15 @@ import os
2
  import subprocess
3
 
4
  # Install flash attention
5
- subprocess.run(
6
- "pip install flash-attn --no-build-isolation",
7
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
8
- shell=True,
9
- )
10
 
11
 
12
  import copy
13
- import spaces
14
  import time
15
  import torch
16
 
@@ -21,19 +21,47 @@ from PIL import Image
21
  import io
22
  import datasets
23
 
 
 
 
24
  import gradio as gr
25
  from transformers import AutoProcessor, TextIteratorStreamer
26
  from transformers import Idefics2ForConditionalGeneration
27
 
 
 
 
 
28
 
29
  DEVICE = torch.device("cuda")
30
- MODELS = {
31
- "idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
32
- "HuggingFaceM4/idefics2-8b-chatty",
33
- torch_dtype=torch.bfloat16,
34
- _attn_implementation="flash_attention_2",
35
- ).to(DEVICE),
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  PROCESSOR = AutoProcessor.from_pretrained(
38
  "HuggingFaceM4/idefics2-8b",
39
  )
@@ -58,116 +86,6 @@ SYSTEM_PROMPT = [
58
  ],
59
  }
60
  ]
61
- examples_path = os.path.dirname(__file__)
62
- EXAMPLES = [
63
- [
64
- {
65
- "text": "For 2024, the interest expense is twice what it was in 2014, and the long-term debt is 10% higher than its 2015 level. Can you calculate the combined total of the interest and long-term debt for 2024?",
66
- "files": [f"{examples_path}/example_images/mmmu_example_2.png"],
67
- }
68
- ],
69
- [
70
- {
71
- "text": "What's in the image?",
72
- "files": [f"{examples_path}/example_images/plant_bulb.webp"],
73
- }
74
- ],
75
- [
76
- {
77
- "text": "Describe the image",
78
- "files": [f"{examples_path}/example_images/baguettes_guarding_paris.png"],
79
- }
80
- ],
81
- [
82
- {
83
- "text": "Read what's written on the paper",
84
- "files": [f"{examples_path}/example_images/paper_with_text.png"],
85
- }
86
- ],
87
- [
88
- {
89
- "text": "The respective main characters of these two movies meet in real life. Imagine their discussion. It should be sassy, and the beginning of a mysterious adventure.",
90
- "files": [f"{examples_path}/example_images/barbie.jpeg", f"{examples_path}/example_images/oppenheimer.jpeg"],
91
- }
92
- ],
93
- [
94
- {
95
- "text": "Can you explain this meme?",
96
- "files": [f"{examples_path}/example_images/running_girl_meme.webp"],
97
- }
98
- ],
99
- [
100
- {
101
- "text": "What happens to fish if pelicans increase?",
102
- "files": [f"{examples_path}/example_images/ai2d_example_2.jpeg"],
103
- }
104
- ],
105
- [
106
- {
107
- "text": "Give an art-critic description of this well known painting",
108
- "files": [f"{examples_path}/example_images/Van-Gogh-Starry-Night.jpg"],
109
- }
110
- ],
111
- [
112
- {
113
- "text": "Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend?",
114
- "files": [f"{examples_path}/example_images/mmmu_example.jpeg"],
115
- }
116
- ],
117
- [
118
- {
119
- "text": "Write an online ad for that product.",
120
- "files": [f"{examples_path}/example_images/shampoo.jpg"],
121
- }
122
- ],
123
- [
124
- {
125
- "text": "Describe this image in detail and explain why it is disturbing.",
126
- "files": [f"{examples_path}/example_images/cat_cloud.jpeg"],
127
- }
128
- ],
129
- [
130
- {
131
- "text": "Why is this image cute?",
132
- "files": [
133
- f"{examples_path}/example_images/kittens-cats-pet-cute-preview.jpg"
134
- ],
135
- }
136
- ],
137
- [
138
- {
139
- "text": "What is formed by the deposition of either the weathered remains of other rocks?",
140
- "files": [f"{examples_path}/example_images/ai2d_example.jpeg"],
141
- }
142
- ],
143
- [
144
- {
145
- "text": "What's funny about this image?",
146
- "files": [f"{examples_path}/example_images/pope_doudoune.webp"],
147
- }
148
- ],
149
- [
150
- {
151
- "text": "Can this happen in real life?",
152
- "files": [f"{examples_path}/example_images/elephant_spider_web.webp"],
153
- }
154
- ],
155
- [
156
- {
157
- "text": "What's unusual about this image?",
158
- "files": [f"{examples_path}/example_images/dragons_playing.png"],
159
- }
160
- ],
161
- [
162
- {
163
- "text": "Why is that image comical?",
164
- "files": [f"{examples_path}/example_images/eye_glasses.jpeg"],
165
- }
166
- ],
167
- ]
168
-
169
- BOT_AVATAR = "IDEFICS_logo.png"
170
-
171
 
172
  # Chatbot utils
173
  def turn_is_pure_media(turn):
@@ -265,7 +183,7 @@ def extract_images_from_msg_list(msg_list):
265
  return all_images
266
 
267
 
268
- @spaces.GPU(duration=180)
269
  def model_inference(
270
  user_prompt,
271
  chat_history,
@@ -324,12 +242,6 @@ def model_inference(
324
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
325
  generation_args.update(inputs)
326
 
327
- # # The regular non streaming generation mode
328
- # _ = generation_args.pop("streamer")
329
- # generated_ids = MODELS[model_selector].generate(**generation_args)
330
- # generated_text = PROCESSOR.batch_decode(generated_ids[:, generation_args["input_ids"].size(-1): ], skip_special_tokens=True)[0]
331
- # return generated_text
332
-
333
  # The streaming generation mode
334
  thread = Thread(
335
  target=MODELS[model_selector].generate,
@@ -414,8 +326,8 @@ top_p = gr.Slider(
414
 
415
 
416
  chatbot = gr.Chatbot(
417
- label="Idefics2-Chatty",
418
- avatar_images=[None, BOT_AVATAR],
419
  height=450,
420
  )
421
 
@@ -464,7 +376,7 @@ with gr.Blocks(
464
  gr.ChatInterface(
465
  fn=model_inference,
466
  chatbot=chatbot,
467
- examples=EXAMPLES,
468
  multimodal=True,
469
  cache_examples=False,
470
  additional_inputs=[
@@ -477,4 +389,4 @@ with gr.Blocks(
477
  ],
478
  )
479
 
480
- demo.launch()
 
2
  import subprocess
3
 
4
  # Install flash attention
5
+ # subprocess.run(
6
+ # "pip install flash-attn --no-build-isolation",
7
+ # env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
8
+ # shell=True,
9
+ # )
10
 
11
 
12
  import copy
13
+ # import spaces
14
  import time
15
  import torch
16
 
 
21
  import io
22
  import datasets
23
 
24
+ # import loralib
25
+ # import bitsandbytes
26
+
27
  import gradio as gr
28
  from transformers import AutoProcessor, TextIteratorStreamer
29
  from transformers import Idefics2ForConditionalGeneration
30
 
31
+ import torch
32
+ from peft import LoraConfig
33
+ from transformers import AutoProcessor, BitsAndBytesConfig, IdeficsForVisionText2Text
34
+
35
 
36
  DEVICE = torch.device("cuda")
37
+
38
+ USE_LORA = False
39
+ USE_QLORA = True
40
+
41
+ if USE_QLORA or USE_LORA:
42
+ lora_config = LoraConfig(
43
+ r=8,
44
+ lora_alpha=8,
45
+ lora_dropout=0.1,
46
+ target_modules='.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$',
47
+ use_dora=False if USE_QLORA else True,
48
+ init_lora_weights="gaussian"
49
+ )
50
+ if USE_QLORA:
51
+ bnb_config = BitsAndBytesConfig(
52
+ load_in_4bit=True,
53
+ bnb_4bit_quant_type="nf4",
54
+ bnb_4bit_compute_dtype=torch.float16
55
+ )
56
+
57
+ MODELS = {
58
+ "idefics2-8b-vqarad-delta": Idefics2ForConditionalGeneration.from_pretrained(
59
+ "jihadzakki/idefics2-8b-vqarad-delta",
60
+ torch_dtype=torch.float16,
61
+ quantization_config=bnb_config if USE_QLORA else None,
62
+ )
63
+ }
64
+
65
  PROCESSOR = AutoProcessor.from_pretrained(
66
  "HuggingFaceM4/idefics2-8b",
67
  )
 
86
  ],
87
  }
88
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # Chatbot utils
91
  def turn_is_pure_media(turn):
 
183
  return all_images
184
 
185
 
186
+ # @spaces.GPU(duration=180)
187
  def model_inference(
188
  user_prompt,
189
  chat_history,
 
242
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
243
  generation_args.update(inputs)
244
 
 
 
 
 
 
 
245
  # The streaming generation mode
246
  thread = Thread(
247
  target=MODELS[model_selector].generate,
 
326
 
327
 
328
  chatbot = gr.Chatbot(
329
+ label="idefics2-8b-vqarad-delta",
330
+ # avatar_images=[None, BOT_AVATAR],
331
  height=450,
332
  )
333
 
 
376
  gr.ChatInterface(
377
  fn=model_inference,
378
  chatbot=chatbot,
379
+ # examples=EXAMPLES,
380
  multimodal=True,
381
  cache_examples=False,
382
  additional_inputs=[
 
389
  ],
390
  )
391
 
392
+ demo.launch()