DongfuJiang commited on
Commit
062730b
Β·
1 Parent(s): 7e5f599
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +10 -2
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Mllava
3
  emoji: πŸ‘
4
  colorFrom: green
5
  colorTo: yellow
 
1
  ---
2
+ title: Mantis
3
  emoji: πŸ‘
4
  colorFrom: green
5
  colorTo: yellow
app.py CHANGED
@@ -4,8 +4,8 @@ import time
4
  from PIL import Image
5
  from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
6
  from typing import List
7
- processor = MLlavaProcessor.from_pretrained("MFuyu/mllava_llava_debug_nlvr2_v5_4096")
8
- model = LlavaForConditionalGeneration.from_pretrained("MFuyu/mllava_llava_debug_nlvr2_v5_4096")
9
 
10
  @spaces.GPU
11
  def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
@@ -95,6 +95,10 @@ def bot(history):
95
 
96
  def build_demo():
97
  with gr.Blocks() as demo:
 
 
 
 
98
  chatbot = gr.Chatbot(line_breaks=True)
99
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
100
 
@@ -112,6 +116,10 @@ def build_demo():
112
  ).then(
113
  bot, chatbot, chatbot, api_name="bot_response"
114
  )
 
 
 
 
115
  return demo
116
 
117
 
 
4
  from PIL import Image
5
  from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
6
  from typing import List
7
+ processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-llava-7b-v1.1")
8
+ model = LlavaForConditionalGeneration.from_pretrained("TIGER-Lab/Mantis-llava-7b-v1.1")
9
 
10
  @spaces.GPU
11
  def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
 
95
 
96
  def build_demo():
97
  with gr.Blocks() as demo:
98
+
99
+ gr.Markdown(""" # Mantis
100
+ Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
101
+ """)
102
  chatbot = gr.Chatbot(line_breaks=True)
103
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
104
 
 
116
  ).then(
117
  bot, chatbot, chatbot, api_name="bot_response"
118
  )
119
+
120
+
121
+
122
+
123
  return demo
124
 
125