Dongxu Li commited on
Commit
1365f85
2 Parent(s): 202fdfa 2f872f9

Merge branch 'main' of https://huggingface.co/spaces/Salesforce/BLIP2

Browse files
Files changed (3) hide show
  1. app.py +18 -12
  2. flower.jpg +0 -0
  3. forbidden_city.webp +0 -0
app.py CHANGED
@@ -126,14 +126,16 @@ def inference_caption(
126
 
127
 
128
  title = """<h1 align="center">BLIP-2</h1>"""
129
- description = """Gradio demo for BLIP-2, a multimodal chatbot from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
130
  <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
131
- article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
132
 
133
  endpoint = Endpoint()
134
 
135
  examples = [
136
  ["house.png", "How could someone get out of the house?"],
 
 
137
  # [
138
  # "sunset.png",
139
  # "Write a romantic message that goes along this photo.",
@@ -162,30 +164,31 @@ with gr.Blocks() as iface:
162
  minimum=0.5,
163
  maximum=1.0,
164
  value=0.8,
 
165
  interactive=True,
166
- label="Temperature",
167
  )
168
 
169
  len_penalty = gr.Slider(
170
- minimum=-2.0,
171
  maximum=2.0,
172
  value=1.0,
173
- step=0.5,
174
  interactive=True,
175
- label="Length Penalty",
176
  )
177
 
178
  rep_penalty = gr.Slider(
179
  minimum=1.0,
180
- maximum=20.0,
181
- value=10.0,
182
  step=0.5,
183
  interactive=True,
184
- label="Repeat Penalty",
185
  )
186
 
187
  with gr.Row():
188
- caption_output = gr.Textbox(lines=2, label="Caption Output")
189
  caption_button = gr.Button(
190
  value="Caption it!", interactive=True, variant="primary"
191
  )
@@ -205,7 +208,7 @@ with gr.Blocks() as iface:
205
  chat_input = gr.Textbox(lines=2, label="Chat Input")
206
 
207
  with gr.Row():
208
- chatbot = gr.Chatbot()
209
  image_input.change(lambda: (None, "", "", []), [], [chatbot, chat_input, caption_output, state])
210
 
211
  with gr.Row():
@@ -237,7 +240,10 @@ with gr.Blocks() as iface:
237
  examples = gr.Examples(
238
  examples=examples,
239
  inputs=[image_input, chat_input],
 
 
 
240
  )
241
 
242
- iface.queue(concurrency_count=1, api_open=False, max_size=20)
243
  iface.launch(enable_queue=True)
 
126
 
127
 
128
  title = """<h1 align="center">BLIP-2</h1>"""
129
+ description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
130
  <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
131
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
132
 
133
  endpoint = Endpoint()
134
 
135
  examples = [
136
  ["house.png", "How could someone get out of the house?"],
137
+ ["flower.jpg", "Question: What is this flower and where is it's origin? Answer:"],
138
+ ["forbidden_city.webp", "In what dynasties was this place build?"],
139
  # [
140
  # "sunset.png",
141
  # "Write a romantic message that goes along this photo.",
 
164
  minimum=0.5,
165
  maximum=1.0,
166
  value=0.8,
167
+ step=0.1,
168
  interactive=True,
169
+ label="Temperature (used with nucleus sampling)",
170
  )
171
 
172
  len_penalty = gr.Slider(
173
+ minimum=-1.0,
174
  maximum=2.0,
175
  value=1.0,
176
+ step=0.2,
177
  interactive=True,
178
+ label="Length Penalty (set to larger for longer sequence, used with beam search)",
179
  )
180
 
181
  rep_penalty = gr.Slider(
182
  minimum=1.0,
183
+ maximum=5.0,
184
+ value=1.5,
185
  step=0.5,
186
  interactive=True,
187
+ label="Repeat Penalty (larger value prevents repetition)",
188
  )
189
 
190
  with gr.Row():
191
+ caption_output = gr.Textbox(lines=2, label="Caption Output (from OPT)")
192
  caption_button = gr.Button(
193
  value="Caption it!", interactive=True, variant="primary"
194
  )
 
208
  chat_input = gr.Textbox(lines=2, label="Chat Input")
209
 
210
  with gr.Row():
211
+ chatbot = gr.Chatbot(label="Chat Output (from FlanT5)")
212
  image_input.change(lambda: (None, "", "", []), [], [chatbot, chat_input, caption_output, state])
213
 
214
  with gr.Row():
 
240
  examples = gr.Examples(
241
  examples=examples,
242
  inputs=[image_input, chat_input],
243
+ # outputs=[chatbot, state],
244
+ # run_on_click=True,
245
+ # fn = inference_chat,
246
  )
247
 
248
+ iface.queue(concurrency_count=1, api_open=False, max_size=10)
249
  iface.launch(enable_queue=True)
flower.jpg ADDED
forbidden_city.webp ADDED