Commit
·
b457cd5
1
Parent(s):
8c1e172
Update app.py
Browse files
app.py
CHANGED
@@ -26,17 +26,28 @@ def upload_audio(audio_path):
|
|
26 |
except:
|
27 |
return None
|
28 |
|
29 |
-
def
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
38 |
|
|
|
39 |
if model == '7B (Default)':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
|
41 |
'audio_path': audio_path, 'question': question
|
42 |
})
|
@@ -45,6 +56,14 @@ def predict(audio_path, question, model):
|
|
45 |
return ans_str_7b
|
46 |
|
47 |
if model == '13B (Beta)':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
|
49 |
'audio_path': audio_path, 'question': question
|
50 |
})
|
@@ -62,7 +81,7 @@ if __name__ == '__main__':
|
|
62 |
demo = gr.Interface(fn=predict,
|
63 |
inputs=[gr.Audio(type="filepath"),
|
64 |
gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?', label='Edit the textbox to ask your own questions!'),
|
65 |
-
gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments
|
66 |
outputs=[gr.Textbox(label="LTU-AS-Output")],
|
67 |
cache_examples=True,
|
68 |
title="Demo of LTU-AS",
|
|
|
26 |
except:
|
27 |
return None
|
28 |
|
29 |
+
def upload_audio_13b(audio_path):
|
30 |
+
try:
|
31 |
+
size = is_file_larger_than_30mb(audio_path)
|
32 |
+
if size == True:
|
33 |
+
return 'size'
|
34 |
+
with open(audio_path, 'rb') as audio_file:
|
35 |
+
response = requests.post('http://sls-titan-5.csail.mit.edu:8080/upload/', files={'audio_file': audio_file})
|
36 |
+
if response.status_code == 200:
|
37 |
+
return response.json()["path"]
|
38 |
+
except:
|
39 |
+
return None
|
40 |
|
41 |
+
def predict(audio_path, question, model):
|
42 |
if model == '7B (Default)':
|
43 |
+
upload_statues = upload_audio(audio_path)
|
44 |
+
if upload_statues == None:
|
45 |
+
return 'Please upload an audio file.'
|
46 |
+
if upload_statues == 'size':
|
47 |
+
return 'This demo does not support audio file size larger than 30MB.'
|
48 |
+
if question == '':
|
49 |
+
return 'Please ask a question.'
|
50 |
+
print(audio_path, question)
|
51 |
response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
|
52 |
'audio_path': audio_path, 'question': question
|
53 |
})
|
|
|
56 |
return ans_str_7b
|
57 |
|
58 |
if model == '13B (Beta)':
|
59 |
+
upload_statues = upload_audio_13b(audio_path)
|
60 |
+
if upload_statues == None:
|
61 |
+
return 'Please upload an audio file.'
|
62 |
+
if upload_statues == 'size':
|
63 |
+
return 'This demo does not support audio file size larger than 30MB.'
|
64 |
+
if question == '':
|
65 |
+
return 'Please ask a question.'
|
66 |
+
print(audio_path, question)
|
67 |
response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
|
68 |
'audio_path': audio_path, 'question': question
|
69 |
})
|
|
|
81 |
demo = gr.Interface(fn=predict,
|
82 |
inputs=[gr.Audio(type="filepath"),
|
83 |
gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?', label='Edit the textbox to ask your own questions!'),
|
84 |
+
gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments are 7B LLM.")],
|
85 |
outputs=[gr.Textbox(label="LTU-AS-Output")],
|
86 |
cache_examples=True,
|
87 |
title="Demo of LTU-AS",
|