lukecq commited on
Commit
aec1ff0
·
verified ·
1 Parent(s): 5cda5a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -88
app.py CHANGED
@@ -8,22 +8,40 @@ import os, json
8
  from sys import argv
9
  from vllm import LLM, SamplingParams
10
 
11
- print(gr.__version__)
 
 
12
 
13
  def load_model_processor(model_path):
14
  processor = AutoProcessor.from_pretrained(model_path)
15
  llm = LLM(
16
- model=model_path, trust_remote_code=True, gpu_memory_utilization=0.8,
17
  enforce_eager=True, device = "cuda",
18
  limit_mm_per_prompt={"audio": 5},
19
  )
20
  return llm, processor
21
 
22
- model_path1 = "Qwen/Qwen2-Audio-7B-Instruct" #argv[1]
23
  model1, processor1 = load_model_processor(model_path1)
24
 
25
- def response_to_audio_conv(conversation, model=None, processor=None, temperature = 0.1,repetition_penalty=1.1, top_p = 0.9,
26
- max_new_tokens = 2048):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
28
  audios = []
29
  for message in conversation:
@@ -52,70 +70,15 @@ def response_to_audio_conv(conversation, model=None, processor=None, temperature
52
  response = output.outputs[0].text
53
  return response
54
 
55
- def print_like_dislike(x: gr.LikeData):
56
- print(x.index, x.value, x.liked)
57
-
58
- def add_message(history, message):
59
- paths = []
60
- for turn in history:
61
- if turn['role'] == "user" and type(turn['content']) != str:
62
- paths.append(turn['content'][0])
63
- for x in message["files"]:
64
- if x not in paths:
65
- history.append({"role": "user", "content": {"path": x}})
66
- if message["text"] is not None:
67
- history.append({"role": "user", "content": message["text"]})
68
- return history, gr.MultimodalTextbox(value=None, interactive=False)
69
-
70
- def format_user_messgae(message):
71
- if type(message['content']) == str:
72
- return {"role": "user", "content": [{"type": "text", "text": message['content']}]}
73
- else:
74
- return {"role": "user", "content": [{"type": "audio", "audio_url": message['content'][0]}]}
75
-
76
- def history_to_conversation(history):
77
- conversation = []
78
- audio_paths = []
79
- for turn in history:
80
- if turn['role'] == "user":
81
- if not turn['content']:
82
- continue
83
- turn = format_user_messgae(turn)
84
- if turn['content'][0]['type'] == 'audio':
85
- if turn['content'][0]['audio_url'] in audio_paths:
86
- continue
87
- else:
88
- audio_paths.append(turn['content'][0]['audio_url'])
89
-
90
- if len(conversation) > 0 and conversation[-1]["role"] == "user":
91
- conversation[-1]['content'].append(turn['content'][0])
92
- else:
93
- conversation.append(turn)
94
- else:
95
- conversation.append(turn)
96
-
97
- print(json.dumps(conversation, indent=4, ensure_ascii=False))
98
- return conversation
99
-
100
- def bot(history: list, temperature = 0.1,repetition_penalty=1.1, top_p = 0.9,
101
- max_new_tokens = 2048):
102
- conversation = history_to_conversation(history)
103
- response = response_to_audio_conv(conversation, model=model1, processor=processor1, temperature = temperature,repetition_penalty=repetition_penalty, top_p = top_p, max_new_tokens = max_new_tokens)
104
- # response = "Nice to meet you!"
105
- print("Bot:",response)
106
-
107
- history.append({"role": "assistant", "content": ""})
108
- for character in response:
109
- history[-1]["content"] += character
110
- time.sleep(0.01)
111
- yield history
112
-
113
- insturctions = """**Instruction**: there are three input format:
114
- 1. text: input text message only
115
- 2. audio: upload audio file or record a voice message
116
- 3. audio + text: record a voice message and input text message"""
117
 
118
  with gr.Blocks() as demo:
 
119
  # gr.Markdown("""<p align="center"><img src="images/seal_logo.png" style="height: 80px"/><p>""")
120
  # gr.Image("images/seal_logo.png", elem_id="seal_logo", show_label=False,height=80,show_fullscreen_button=False)
121
  gr.Markdown(
@@ -148,31 +111,35 @@ with gr.Blocks() as demo:
148
  # top_p = gr.Slider(minimum=0.1, maximum=1, value=0.5, step=0.1, label="Top P")
149
  # with gr.Column():
150
  # repetition_penalty = gr.Slider(minimum=0, maximum=2, value=1.1, step=0.1, label="Repetition Penalty")
151
- chatbot = gr.Chatbot(elem_id="chatbot", bubble_full_width=False, type="messages")
152
-
153
- chat_input = gr.MultimodalTextbox(
154
- interactive=True,
155
- file_count="single",
156
- file_types=['.wav'],
157
- placeholder="Enter message (optional) ...",
158
- show_label=False,
159
- sources=["microphone", "upload"],
 
 
 
 
 
 
 
 
 
 
 
160
  )
161
 
162
- chat_msg = chat_input.submit(
163
- add_message, [chatbot, chat_input], [chatbot, chat_input]
 
 
 
164
  )
165
- bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
166
- # bot_msg = chat_msg.then(bot, [chatbot, temperature, repetition_penalty, top_p], chatbot, api_name="bot_response")
167
- bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
168
-
169
- # chatbot.like(print_like_dislike, None, None, like_user_message=True)
170
 
171
- clear_button = gr.ClearButton([chatbot, chat_input])
172
-
173
- # PORT = 7950
174
- # demo.launch(server_port=PORT, show_api = True, allowed_paths = [],
175
- # root_path = f"https://dsw-gateway.alibaba-inc.com/dsw81322/proxy/{PORT}/")
176
 
177
  demo.launch(
178
  share=False,
 
8
  from sys import argv
9
  from vllm import LLM, SamplingParams
10
 
11
+ from huggingface_hub import login
12
+ TOKEN = os.environ.get("TOKEN", None)
13
+ login(token=TOKEN)
14
 
15
  def load_model_processor(model_path):
16
  processor = AutoProcessor.from_pretrained(model_path)
17
  llm = LLM(
18
+ model=model_path, trust_remote_code=True, gpu_memory_utilization=0.4,
19
  enforce_eager=True, device = "cuda",
20
  limit_mm_per_prompt={"audio": 5},
21
  )
22
  return llm, processor
23
 
24
+ model_path1 = "SeaLLMs/SeaLLMs-Audio-7B"
25
  model1, processor1 = load_model_processor(model_path1)
26
 
27
+ def response_to_audio(audio_url, text, model=None, processor=None, temperature = 0.1,repetition_penalty=1.1, top_p = 0.9,max_new_tokens = 2048):
28
+ if text == None:
29
+ conversation = [
30
+ {"role": "user", "content": [
31
+ {"type": "audio", "audio_url": audio_url},
32
+ ]},]
33
+ elif audio_url == None:
34
+ conversation = [
35
+ {"role": "user", "content": [
36
+ {"type": "text", "text": text},
37
+ ]},]
38
+ else:
39
+ conversation = [
40
+ {"role": "user", "content": [
41
+ {"type": "audio", "audio_url": audio_url},
42
+ {"type": "text", "text": text},
43
+ ]},]
44
+
45
  text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
46
  audios = []
47
  for message in conversation:
 
70
  response = output.outputs[0].text
71
  return response
72
 
73
+ def clear_inputs():
74
+ return None, "", ""
75
+
76
+ def compare_responses(audio_url, text):
77
+ response1 = response_to_audio(audio_url, text, model1, processor1)
78
+ return response1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  with gr.Blocks() as demo:
81
+ # gr.Markdown(f"Evaluate {model_path1}")
82
  # gr.Markdown("""<p align="center"><img src="images/seal_logo.png" style="height: 80px"/><p>""")
83
  # gr.Image("images/seal_logo.png", elem_id="seal_logo", show_label=False,height=80,show_fullscreen_button=False)
84
  gr.Markdown(
 
111
  # top_p = gr.Slider(minimum=0.1, maximum=1, value=0.5, step=0.1, label="Top P")
112
  # with gr.Column():
113
  # repetition_penalty = gr.Slider(minimum=0, maximum=2, value=1.1, step=0.1, label="Repetition Penalty")
114
+
115
+ with gr.Row():
116
+ with gr.Column():
117
+ # mic_input = gr.Microphone(label="Record Audio", type="filepath", elem_id="mic_input")
118
+ mic_input = gr.Audio(sources = ['upload', 'microphone'], label="Record Audio", type="filepath", elem_id="mic_input")
119
+ with gr.Column():
120
+ additional_input = gr.Textbox(label="Text Input")
121
+
122
+ # Button to trigger the function
123
+ with gr.Row():
124
+ btn_submit = gr.Button("Submit")
125
+ btn_clear = gr.Button("Clear")
126
+
127
+ with gr.Row():
128
+ output_text1 = gr.Textbox(label=model_path1.split('/')[-1], interactive=False, elem_id="output_text1")
129
+
130
+ btn_submit.click(
131
+ fn=response_to_audio,
132
+ inputs=[mic_input, additional_input],
133
+ outputs=[output_text1],
134
  )
135
 
136
+ btn_clear.click(
137
+ fn=clear_inputs,
138
+ inputs=None,
139
+ outputs=[mic_input, additional_input, output_text1],
140
+ queue=False,
141
  )
 
 
 
 
 
142
 
 
 
 
 
 
143
 
144
  demo.launch(
145
  share=False,