MERaLiON-AudioLLM / src /generation.py
lingy's picture
init new space
50b6262
FIXED_GENERATION_CONFIG = dict(
max_completion_tokens=1024,
top_k=50,
length_penalty=1.0,
seed=42
)
MAX_AUDIO_LENGTH = 120
def prepare_multimodal_content(text_input, base64_audio_input):
return [
{
"type": "text",
"text": f"Text instruction: {text_input}"
},
{
"type": "audio_url",
"audio_url": {
"url": f"data:audio/ogg;base64,{base64_audio_input}"
},
},
]
def change_multimodal_content(
original_content,
text_input="",
base64_audio_input=""):
# Since python 3.7 dictionary is ordered.
if text_input:
original_content[0] = {
"type": "text",
"text": f"Text instruction: {text_input}"
}
if base64_audio_input:
original_content[1] = {
"type": "audio_url",
"audio_url": {
"url": f"data:audio/ogg;base64,{base64_audio_input}"
}
}
return original_content