whisper-vs-distil-whisper-zero

Running on Zero

App Files Files Community

sanchit-gandhi commited on Nov 23, 2023

Commit

77ac2cf

1 Parent(s): 7bd1e74

fix bugs

Browse files

Files changed (3) hide show

app.py +9 -9
assets/example_1.wav +2 -2
assets/example_2.wav +2 -2

app.py CHANGED Viewed

@@ -78,32 +78,32 @@ def transcribe(inputs):
         inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
         def _forward_distil_time(*args, **kwargs):
-            global distil_runtime
             start_time = time.time()
             result = distil_pipe_forward(*args, **kwargs)
-            distil_runtime = time.time() - start_time
-            distil_runtime = round(distil_runtime, 2)
             return result
         distil_pipe._forward = _forward_distil_time
         distil_text = distil_pipe(inputs.copy(), batch_size=BATCH_SIZE)["text"]
-        yield distil_text, distil_runtime, None, None
         def _forward_time(*args, **kwargs):
-            global runtime
             start_time = time.time()
             result = pipe_forward(*args, **kwargs)
-            runtime = time.time() - start_time
-            runtime = round(runtime, 2)
             return result
         pipe._forward = _forward_time
         text = pipe(inputs, batch_size=BATCH_SIZE)["text"]
-        yield distil_text, distil_runtime, text, runtime
     else:
-        input_features = processor(inputs, sampling_rate=processor.feature_extractor.sampling_rate, return_tensors="pt").input_features
         # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
         generation_kwargs = dict(input_features, streamer=streamer, max_new_tokens=128, language="en", task="transcribe")

         inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
         def _forward_distil_time(*args, **kwargs):
+            global distil_runtime_pipeline
             start_time = time.time()
             result = distil_pipe_forward(*args, **kwargs)
+            distil_runtime_pipeline = time.time() - start_time
+            distil_runtime_pipeline = round(distil_runtime_pipeline, 2)
             return result
         distil_pipe._forward = _forward_distil_time
         distil_text = distil_pipe(inputs.copy(), batch_size=BATCH_SIZE)["text"]
+        yield distil_text, distil_runtime_pipeline, None, None
         def _forward_time(*args, **kwargs):
+            global runtime_pipeline
             start_time = time.time()
             result = pipe_forward(*args, **kwargs)
+            runtime_pipeline = time.time() - start_time
+            runtime_pipeline = round(runtime_pipeline, 2)
             return result
         pipe._forward = _forward_time
         text = pipe(inputs, batch_size=BATCH_SIZE)["text"]
+        yield distil_text, distil_runtime_pipeline, text, runtime_pipeline
     else:
+        input_features = processor(inputs, sampling_rate=processor.feature_extractor.sampling_rate, return_tensors="pt")
         # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
         generation_kwargs = dict(input_features, streamer=streamer, max_new_tokens=128, language="en", task="transcribe")

assets/example_1.wav CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e938b9f81dea096ec7d3752e90afca8d370f7a461d3a08e1a559f4440ed055d
-size 1963810

 version https://git-lfs.github.com/spec/v1
+oid sha256:d96fece5c0c24d039801e9e39e9985982ad63becdab6c1a141992aa6dd37a615
+size 802110

assets/example_2.wav CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81fc0857f7fe11416ede431db713a02fdb787bbc049802fe74c791f3b44e5bf4
-size 1920044

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e938b9f81dea096ec7d3752e90afca8d370f7a461d3a08e1a559f4440ed055d
+size 1963810