Commit
·
ad0f220
1
Parent(s):
9a72e52
Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ def upload_audio(audio_path):
|
|
26 |
except:
|
27 |
return None
|
28 |
|
29 |
-
def predict(audio_path, question):
|
30 |
upload_statues = upload_audio(audio_path)
|
31 |
if upload_statues == None:
|
32 |
return 'Please upload an audio file.'
|
@@ -35,18 +35,22 @@ def predict(audio_path, question):
|
|
35 |
if question == '':
|
36 |
return 'Please ask a question.'
|
37 |
print(audio_path, question)
|
38 |
-
response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
|
39 |
-
'audio_path': audio_path, 'question': question
|
40 |
-
})
|
41 |
-
answer_7b = json.loads(response.content)
|
42 |
-
ans_str_7b = answer_7b['output']
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
if __name__ == '__main__':
|
52 |
link = "https://github.com/YuanGongND/ltu"
|
@@ -58,8 +62,9 @@ if __name__ == '__main__':
|
|
58 |
demo = gr.Interface(fn=predict,
|
59 |
inputs=[gr.Audio(type="filepath"),
|
60 |
gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
|
61 |
-
label='Edit the textbox to ask your own questions!')
|
62 |
-
|
|
|
63 |
cache_examples=True,
|
64 |
title="Demo of LTU-AS",
|
65 |
description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
|
|
|
26 |
except:
|
27 |
return None
|
28 |
|
29 |
+
def predict(audio_path, question, model):
|
30 |
upload_statues = upload_audio(audio_path)
|
31 |
if upload_statues == None:
|
32 |
return 'Please upload an audio file.'
|
|
|
35 |
if question == '':
|
36 |
return 'Please ask a question.'
|
37 |
print(audio_path, question)
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
if model == '7B (Default)':
|
40 |
+
response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
|
41 |
+
'audio_path': audio_path, 'question': question
|
42 |
+
})
|
43 |
+
answer_7b = json.loads(response.content)
|
44 |
+
ans_str_7b = answer_7b['output']
|
45 |
+
return ans_str_7b
|
46 |
+
|
47 |
+
if model == '13B (Beta)'
|
48 |
+
response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
|
49 |
+
'audio_path': audio_path, 'question': question
|
50 |
+
})
|
51 |
+
answer_13b = json.loads(response.content)
|
52 |
+
ans_str_13b = answer_13b['output']
|
53 |
+
return ans_str_13b
|
54 |
|
55 |
if __name__ == '__main__':
|
56 |
link = "https://github.com/YuanGongND/ltu"
|
|
|
62 |
demo = gr.Interface(fn=predict,
|
63 |
inputs=[gr.Audio(type="filepath"),
|
64 |
gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
|
65 |
+
label='Edit the textbox to ask your own questions!'),
|
66 |
+
gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments are 7B LLM.")]
|
67 |
+
outputs=[gr.Textbox(label="LTU-AS-Output")],
|
68 |
cache_examples=True,
|
69 |
title="Demo of LTU-AS",
|
70 |
description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
|