whisper-jax_asr

Runtime error

App Files Files Community

sanchit-gandhi commited on Apr 20, 2023

Commit

6d71260

1 Parent(s): a741e24

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -40

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import base64
 import math
 import os
 import time
 from multiprocessing import Pool
 import gradio as gr
@@ -24,8 +25,9 @@ To skip the queue, you may wish to create your own inference endpoint, details f
 article = "Whisper large-v2 model by OpenAI. Backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX [code](https://github.com/sanchit-gandhi/whisper-jax) and Gradio demo by 🤗 Hugging Face."
-API_URL = os.getenv("API_URL")
-API_URL_FROM_FEATURES = os.getenv("API_URL_FROM_FEATURES")
 language_names = sorted(TO_LANGUAGE_CODE.keys())
 CHUNK_LENGTH_S = 30
 BATCH_SIZE = 16
@@ -33,48 +35,32 @@ NUM_PROC = 16
 FILE_LIMIT_MB = 1000
-def query(payload):
-    response = requests.post(API_URL, json=payload)
     return response.json(), response.status_code
-def inference(inputs, task=None, return_timestamps=False):
-    payload = {"inputs": inputs, "task": task, "return_timestamps": return_timestamps}
-    data, status_code = query(payload)
     if status_code == 200:
-        text = data["text"]
     else:
-        text = data["detail"]
-    timestamps = data.get("chunks")
-    if timestamps is not None:
-        timestamps = [
-            f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
-            for chunk in timestamps
-        ]
-        text = "\n".join(str(feature) for feature in timestamps)
-    return text
-def chunked_query(payload):
-    response = requests.post(API_URL_FROM_FEATURES, json=payload)
-    return response.json()
-def forward(batch, task=None, return_timestamps=False):
     feature_shape = batch["input_features"].shape
     batch["input_features"] = base64.b64encode(batch["input_features"].tobytes()).decode()
-    outputs = chunked_query(
-        {"batch": batch, "task": task, "return_timestamps": return_timestamps, "feature_shape": feature_shape}
-    )
-    outputs["tokens"] = np.asarray(outputs["tokens"])
-    return outputs
-def identity(batch):
-    return batch
 # Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
@@ -108,22 +94,21 @@ if __name__ == "__main__":
     def tqdm_generate(inputs: dict, task: str, return_timestamps: bool, progress: gr.Progress):
         inputs_len = inputs["array"].shape[0]
-        all_chunk_start_idx = np.arange(0, inputs_len, step)
-        num_samples = len(all_chunk_start_idx)
         num_batches = math.ceil(num_samples / BATCH_SIZE)
-        dummy_batches = list(
-            range(num_batches)
-        )  # Gradio progress bar not compatible with generator, see https://github.com/gradio-app/gradio/issues/3841
         dataloader = processor.preprocess_batch(inputs, chunk_length_s=CHUNK_LENGTH_S, batch_size=BATCH_SIZE)
-        progress(0, desc="Pre-processing audio file...")
-        dataloader = pool.map(identity, dataloader)
         model_outputs = []
         start_time = time.time()
         # iterate over our chunked audio samples
-        for batch, _ in zip(dataloader, progress.tqdm(dummy_batches, desc="Transcribing...")):
-            model_outputs.append(forward(batch, task=task, return_timestamps=return_timestamps))
         runtime = time.time() - start_time
         post_processed = processor.postprocess(model_outputs, return_timestamps=return_timestamps)

 import math
 import os
 import time
+from functools import partial
 from multiprocessing import Pool
 import gradio as gr
 article = "Whisper large-v2 model by OpenAI. Backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX [code](https://github.com/sanchit-gandhi/whisper-jax) and Gradio demo by 🤗 Hugging Face."
+API_SEND_URL = os.getenv("API_SEND_URL")
+API_FORWARD_URL = os.getenv("API_FORWARD_URL")
 language_names = sorted(TO_LANGUAGE_CODE.keys())
 CHUNK_LENGTH_S = 30
 BATCH_SIZE = 16
 FILE_LIMIT_MB = 1000
+def query(url, payload):
+    response = requests.post(url, json=payload)
     return response.json(), response.status_code
+def inference(batch_id, idx, task=None, return_timestamps=False):
+    payload = {"batch_id": batch_id, "idx": idx, "task": task, "return_timestamps": return_timestamps}
+    data, status_code = query(API_FORWARD_URL, payload)
     if status_code == 200:
+        tokens = {"tokens": np.asarray(data["tokens"])}
+        return tokens
     else:
+        gr.Error(data["detail"])
+def send_chunks(batch, batch_id):
     feature_shape = batch["input_features"].shape
     batch["input_features"] = base64.b64encode(batch["input_features"].tobytes()).decode()
+    query(API_SEND_URL, {"batch": batch, "feature_shape": feature_shape, "batch_id": batch_id})
+def forward(batch_id, idx, task=None, return_timestamps=False):
+    outputs = inference(batch_id, idx, task, return_timestamps)
+    return outputs
 # Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
     def tqdm_generate(inputs: dict, task: str, return_timestamps: bool, progress: gr.Progress):
         inputs_len = inputs["array"].shape[0]
+        all_chunk_start_batch_id = np.arange(0, inputs_len, step)
+        num_samples = len(all_chunk_start_batch_id)
         num_batches = math.ceil(num_samples / BATCH_SIZE)
+        dummy_batches = list(range(num_batches))
         dataloader = processor.preprocess_batch(inputs, chunk_length_s=CHUNK_LENGTH_S, batch_size=BATCH_SIZE)
+        progress(0, desc="Sending audio to TPU...")
+        batch_id = np.random.randint(1000000)  # TODO(SG): swap to an iterator
+        pool.map(partial(send_chunks, batch_id=batch_id), dataloader)
         model_outputs = []
         start_time = time.time()
         # iterate over our chunked audio samples
+        for idx in progress.tqdm(dummy_batches, desc="Transcribing..."):
+            model_outputs.append(forward(batch_id, idx, task=task, return_timestamps=return_timestamps))
         runtime = time.time() - start_time
         post_processed = processor.postprocess(model_outputs, return_timestamps=return_timestamps)