Spaces:

aikitty
/

testing-sandbox-huggingsound

Runtime error

aikitty commited on May 30, 2024

Commit

dc7d091

verified ·

1 Parent(s): 2ffbef3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,37 @@
-from huggingsound import SpeechRecognitionModel
-model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
-audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"]
-transcriptions = model.transcribe(audio_paths)

+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import soundfile as sf
+import gradio as gr
+# Load the pre-trained processor and model
+processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
+model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
+def speech_to_text(audio):
+    # Load audio file
+    speech, sample_rate = sf.read(audio)
+    # Preprocess the audio file
+    inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt", padding=True)
+    # Perform inference
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    # Decode the predicted ids to text
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
+    return transcription[0]
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=speech_to_text,
+    inputs=gr.inputs.Audio(source="upload", type="filepath"),
+    outputs="text",
+    title="Chinese Speech Recognition",
+    description="Upload an audio file and get the transcribed text using the wav2vec2-large-xlsr-53-chinese-zh-cn model."
+)
+if __name__ == "__main__":
+    iface.launch()