breakdown files and process

Browse files

Files changed (4) hide show

.gitignore +5 -0
handler.py +62 -13
requirements.txt +2 -0
test_handler.py +15 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+venv
+__pycache__
+.vscode
+pretrained_models
+*.mp3

handler.py CHANGED Viewed

@@ -1,24 +1,73 @@
 import logging
 from speechbrain.pretrained import EncoderClassifier
 from typing import Dict, List, Any
 class EndpointHandler:
     def __init__(self, path=""):
-        self.model = EncoderClassifier.from_hparams("speechbrain/lang-id-voxlingua107-ecapa")
-        print('model loaded')
-        logging.info('model loaded')
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        inputs = data.pop("inputs",data)
-        print('audio_url', inputs)
-        logging.info(f'audio_url {inputs}')
-        # run normal prediction
-        output = self.model.classify_file(inputs)
-        return {
-            "prediction": float(output[1].exp()[0]),
-            "language": output[3][0],
-        }

 import logging
 from speechbrain.pretrained import EncoderClassifier
 from typing import Dict, List, Any
+import requests
+from pydub import AudioSegment
+from io import BytesIO
+import tempfile
+import os
+def save_chunks_to_temp_files(url, chunk_length=10000):  # chunk_length in milliseconds
+    # Download the audio file from the URL
+    response = requests.get(url)
+    response.raise_for_status()
+    # Ensure the content type is audio
+    if "audio" not in response.headers["Content-Type"]:
+        raise ValueError("URL does not seem to be an audio file")
+    # Convert the downloaded bytes into a file-like object
+    audio_file = BytesIO(response.content)
+    # Load audio into an AudioSegment
+    audio_segment = AudioSegment.from_file(audio_file)
+    # Split audio into 10-second chunks
+    chunks = [
+        audio_segment[i : i + chunk_length]
+        for i in range(0, len(audio_segment), chunk_length)
+    ]
+    # Save each chunk to a temporary file and store file paths in a list
+    temp_files = []
+    for idx, chunk in enumerate(chunks):
+        with tempfile.NamedTemporaryFile(
+            delete=False, suffix=f"_chunk{idx}.mp3"
+        ) as temp_file:
+            chunk.export(temp_file.name, format="mp3")
+            temp_files.append(temp_file.name)
+    return temp_files
 class EndpointHandler:
     def __init__(self, path=""):
+        self.model = EncoderClassifier.from_hparams(
+            "speechbrain/lang-id-voxlingua107-ecapa"
+        )
+        print("model loaded")
+        logging.info("model loaded")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        url = data.pop("inputs", data)
+        print("audio_url", url)
+        logging.info(f"audio_url {url}")
+        response = []
+        temp_filepaths = save_chunks_to_temp_files(url)
+        for i, path in enumerate(temp_filepaths):
+            logging.info(f"processing chunk {i} / {len(temp_filepaths)}")
+            output = self.model.classify_file(path)
+            response.append(
+                {
+                    "prediction": float(output[1].exp()[0]),
+                    "language": output[3][0],
+                }
+            )
+            os.remove(path)
+        return response

requirements.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	speechbrain

 speechbrain
+pydub
+requests

test_handler.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from handler import EndpointHandler
+# init handler
+my_handler = EndpointHandler()
+# prepare sample payload
+holiday_payload = {
+    "inputs": "https://pl-bots-public-media.s3.amazonaws.com/5511976170855_daa87950-5e1b-49e0-9daf-ba73d568a291.mp3"
+}
+# test the handler
+holiday_payload = my_handler(holiday_payload)
+# show results
+print("holiday_payload", holiday_payload)