STANDARD_QUERIES = [ { "query_text": "Please transcribe this speech.", "doc_text": "Listen to a speech and write down exactly what is being said in text form. It's essentially converting spoken words into written words. Provide the exact transcription of the given audio. Record whatever the speaker has said into written text.", "response_prefix_text": "The transcription of the speech is: ", "ui_text": "speech trancription" }, { "query_text": "Please describe what happended in this audio", "doc_text": "Text captions describing the sound events and environments in the audio clips, describing the events and actions happened in the audio.", "response_prefix_text": "Events in this audio clip: ", "ui_text": "audio caption" }, { "query_text": "May I know the gender of the speakers", "doc_text": "Identify the gender, male or female, based on pitch, formants, harmonics, and prosody features, and other speech pattern differences between genders.", "response_prefix_text": "By analyzing pitch, formants, harmonics, and prosody features, which reflect physiological and speech pattern differences between genders: ", "ui_text": "gender recognition" }, { "query_text": "May I know the nationality of the speakers", "doc_text": "Discover speakers' nationality, country, or the place he is coming from, from his/her accent, pronunciation patterns, and other language-specific speech features influenced by cultural and linguistic backgrounds.", "response_prefix_text": "By analyzing accent, pronunciation patterns, intonation, rhythm, phoneme usage, and language-specific speech features influenced by cultural and linguistic backgrounds: ", "ui_text": "natinoality recognition" }, { "query_text": "Can you guess which ethnic group this person is from based on their accent.", "doc_text": "Discover speakers' ethnic group, home country, or the place he is coming from, from his/her accent, tone, and other vocal characteristics influenced by cultural, regional, and linguistic factors.", "response_prefix_text": "By analyzing speech features like accent, tone, intonation, phoneme variations, and vocal characteristics influenced by cultural, regional, and linguistic factors: ", "ui_text": "ethnic group recognition" }, { "query_text": "What do you think the speakers are feeling.", "doc_text": "What do you think the speakers are feeling. Please identify speakers' emotions by analyzing vocal features like pitch, tone, volume, speech rate, rhythm, and spectral energy, which reflect emotional states such as happiness, anger, sadness, or fear.", "response_prefix_text": "By analyzing vocal features like pitch, tone, volume, speech rate, rhythm, and spectral energy: ", "ui_text": "emotion recognition" }, ]