Spaces:
Running
Running
File size: 4,979 Bytes
8573823 89ed0ae 8573823 89ed0ae e9402b5 8573823 89ed0ae 8573823 e9402b5 8573823 89ed0ae 8573823 e9402b5 8573823 e9402b5 8573823 89ed0ae e9402b5 89ed0ae 8573823 89ed0ae 8573823 89ed0ae 8573823 89ed0ae 8573823 89ed0ae 8573823 89ed0ae 8573823 89ed0ae 8573823 e9402b5 8573823 e9402b5 8573823 89ed0ae 8573823 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import numpy as np
import streamlit as st
from src.generation import (
prepare_multimodal_content,
change_multimodal_content
)
from src.content.common import (
MODEL_NAMES,
VOICE_CHAT_DIALOGUE_STATES,
reset_states,
process_audio_bytes,
init_state_section,
header_section,
sidebar_fragment,
retrive_response_with_ui
)
# TODO: change this.
DEFAULT_PROMPT = "Based on the information in this user’s voice, please reply to the user in a friendly and helpful way."
MAX_VC_ROUNDS = 5
def bottom_input_section():
bottom_cols = st.columns([0.03, 0.97])
with bottom_cols[0]:
st.button(
':material/delete:',
disabled=st.session_state.disprompt,
on_click=lambda: reset_states(VOICE_CHAT_DIALOGUE_STATES)
)
with bottom_cols[1]:
uploaded_file = st.audio_input(
label="record audio",
label_visibility="collapsed",
disabled=st.session_state.disprompt,
on_change=lambda: st.session_state.update(
on_record=True,
disprompt=True
),
key='record'
)
if uploaded_file and st.session_state.on_record:
audio_bytes = uploaded_file.read()
st.session_state.vc_audio_array, st.session_state.vc_audio_base64 = \
process_audio_bytes(audio_bytes)
st.session_state.update(
on_record=False,
)
@st.fragment
def system_prompt_fragment():
with st.expander("System Prompt"):
st.text_area(
label="Insert system instructions or background knowledge here.",
label_visibility="collapsed",
disabled=st.session_state.disprompt,
max_chars=5000,
key="system_prompt",
value=DEFAULT_PROMPT,
)
def conversation_section():
chat_message_container = st.container(height=480)
for message in st.session_state.vc_messages:
with chat_message_container.chat_message(message["role"]):
if message.get("error"):
st.error(message["error"])
for warning_msg in message.get("warnings", []):
st.warning(warning_msg)
if message.get("audio", np.array([])).shape[0]:
st.audio(message["audio"], format="audio/wav", sample_rate=16000)
if message.get("content"):
st.write(message["content"])
with st._bottom:
bottom_input_section()
if not st.session_state.vc_audio_base64:
return
if len(st.session_state.vc_messages) >= MAX_VC_ROUNDS * 2:
st.toast(f":warning: max conversation rounds ({MAX_VC_ROUNDS}) reached!")
return
one_time_prompt = DEFAULT_PROMPT
one_time_array = st.session_state.vc_audio_array
one_time_base64 = st.session_state.vc_audio_base64
st.session_state.update(
vc_audio_array=np.array([]),
vc_audio_base64="",
)
with chat_message_container.chat_message("user"):
st.audio(one_time_array, format="audio/wav", sample_rate=16000)
st.session_state.vc_messages.append({"role": "user", "audio": one_time_array})
if not st.session_state.vc_model_messages:
one_time_prompt = st.session_state.system_prompt
else:
st.session_state.vc_model_messages[0]["content"] = change_multimodal_content(
st.session_state.vc_model_messages[0]["content"],
text_input=st.session_state.system_prompt
)
with chat_message_container.chat_message("assistant"):
with st.spinner("Thinking..."):
error_msg, warnings, response = retrive_response_with_ui(
model_name=MODEL_NAMES["audiollm-it"]["vllm_name"],
text_input=one_time_prompt,
array_audio_input=one_time_array,
base64_audio_input=one_time_base64,
stream=True,
history=st.session_state.vc_model_messages
)
st.session_state.vc_messages.append({
"role": "assistant",
"error": error_msg,
"warnings": warnings,
"content": response
})
mm_content = prepare_multimodal_content(one_time_prompt, one_time_base64)
st.session_state.vc_model_messages.extend([
{"role": "user", "content": mm_content},
{"role": "assistant", "content": response}
])
st.session_state.disprompt=False
st.rerun(scope="app")
def voice_chat_page():
init_state_section()
header_section(
component_name="Voice Chat",
description=""" Currently support up to <strong>5 rounds</strong> of conversations.
Feel free to talk about anything.""",
concise_description=" Currently support up to <strong>5 rounds</strong> of conversations.",
icon="🗣️"
)
with st.sidebar:
sidebar_fragment()
system_prompt_fragment()
conversation_section() |