Music-To-Image / app.py
fffiloni's picture
Update app.py
85e7dfb
raw
history blame
2.16 kB
import gradio as gr
lpmc_client = gr.load("seungheondoh/LP-Music-Caps-demo", src="spaces")
from gradio_client import Client
client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
from pydub import AudioSegment
def cut_audio(input_path, output_path, max_duration=30000):
audio = AudioSegment.from_file(input_path)
if len(audio) > max_duration:
audio = audio[:max_duration]
audio.export(output_path, format="mp3")
return output_path
def infer(audio_file):
truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
cap_result = lpmc_client(
truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
api_name="predict"
)
print(cap_result)
summarize_q = f"""
I'll give you a list of music descriptions. Create a summary reflecting the musical ambiance.
Do not processs each segment, but provide a summary for the whole instead.
Here's the list:
{cap_result}
"""
summary_result = client.predict(
summarize_q, # str in 'Message' Textbox component
api_name="/chat_1"
)
print(f"SUMMARY: {summary_result}")
llama_q = f"""
I'll give you music description, then i want you to provide an illustrative image description that would fit well with the music.
Answer with only one image description. Never do lists. Also specify in the description that we do not want photography.
Here's the music description :
{summary_result}
"""
result = client.predict(
llama_q, # str in 'Message' Textbox component
api_name="/chat_1"
)
print(result)
return cap_result, result
with gr.Blocks() as demo:
with gr.Column(elem_id="col-container"):
audio_input = gr.Audio(type="filepath", source="upload")
infer_btn = gr.Button("Generate")
lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
llama_trans_cap = gr.Textbox(label="Llama translation")
img_result = gr.Video(label="Result")
infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap])
demo.queue().launch()