Spaces:

romas-458
/

asr

Runtime error

roman commited on Jun 8, 2024

Commit

9c6f6ce

1 Parent(s): 6775bac

reset state of output

Files changed (1) hide show

app.py CHANGED Viewed

@@ -65,7 +65,7 @@ def decode_predictions(logits_list):
     return current_hypotheses[0]
-def transcribe(audio, state):
     if state is None:
         state = [np.array([], dtype=np.float32), []]
@@ -88,6 +88,31 @@ def transcribe(audio, state):
         text = decode_predictions(state[1])
     return text, state
 gr.Interface(
     fn=transcribe,
@@ -95,7 +120,8 @@ gr.Interface(
         # gr.Audio(source="upload", type="filepath", streaming=True),
         gr.Audio(source="upload", type="filepath"),
         # "state"
-        gr.State(None)
     ],
     outputs=[
         "textbox",

     return current_hypotheses[0]
+def transcribe_(audio, state):
     if state is None:
         state = [np.array([], dtype=np.float32), []]
         text = decode_predictions(state[1])
     return text, state
+def transcribe(audio, state, reset_state):
+    if reset_state:
+        state = [np.array([], dtype=np.float32), []]
+    if state is None:
+        state = [np.array([], dtype=np.float32), []]
+    audio_16k = resample(audio)
+    # join to audio sequence
+    state[0] = np.concatenate([state[0], audio_16k])
+    while (len(state[0]) > total_buffer):
+        buffer = state[0][:total_buffer]
+        state[0] = state[0][total_buffer - overhead_len:]
+        # run model
+        logits = model(buffer)
+        # add logits
+        state[1].append(logits)
+    if len(state[1]) == 0:
+        text = ""
+    else:
+        text = decode_predictions(state[1])
+    return text, state
 gr.Interface(
     fn=transcribe,
         # gr.Audio(source="upload", type="filepath", streaming=True),
         gr.Audio(source="upload", type="filepath"),
         # "state"
+        gr.State(None),
+        gr.Button(text="Reset State", label="Reset State")
     ],
     outputs=[
         "textbox",