skit-ai
/

speechllm-2B

Feature Extraction

speech-language

Model card Files Files and versions Community

shangeth commited on Jun 25, 2024

Commit

6a10328

·

verified ·

1 Parent(s): 4d00091

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -122,7 +122,7 @@ model = AutoModel.from_pretrained("skit-ai/speechllm-2B", trust_remote_code=True
 model.generate_meta(
 	audio_path="path-to-audio.wav", #16k Hz, mono
-    audio_tensor=torchaudio.load("path-to-audio.wav")[2], # [Optional] either audio_path or audio_tensor directly
 	instruction="Give me the following information about the audio [SpeechActivity, Transcript, Gender, Emotion, Age, Accent]",
 	max_new_tokens=500,
 	return_special_tokens=False

 model.generate_meta(
 	audio_path="path-to-audio.wav", #16k Hz, mono
+    audio_tensor=torchaudio.load("path-to-audio.wav")[1], # [Optional] either audio_path or audio_tensor directly
 	instruction="Give me the following information about the audio [SpeechActivity, Transcript, Gender, Emotion, Age, Accent]",
 	max_new_tokens=500,
 	return_special_tokens=False