Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

App Files Files Community

AshDavid12 commited on Sep 9, 2024

Commit

9bd82d6

1 Parent(s): 1bd5368

origin infer wo runpod

Browse files

Files changed (3) hide show

Dockerfile +14 -59
infer.py +41 -141
whisper_online.py +7 -4

Dockerfile CHANGED Viewed

@@ -1,65 +1,20 @@
-FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
-ENV PYTHON_VERSION=3.11
-RUN export DEBIAN_FRONTEND=noninteractive \
-    && apt-get -qq update \
-    && apt-get -qq install --no-install-recommends \
-    python${PYTHON_VERSION} \
-    python${PYTHON_VERSION}-venv \
-    python3-pip \
-    libcublas11 \
-    && rm -rf /var/lib/apt/lists/*
-# Set up Python environment
-RUN python3 -m pip install --upgrade pip
-# Copy the requirements file and install Python packages
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-# Install the specific model using faster-whisper
-#RUN python3 -c 'import faster_whisper; m = faster_whisper.WhisperModel("ivrit-ai/faster-whisper-v2-d3-e3")'
-# Set the SENTENCE_TRANSFORMERS_HOME environment variable to a writable directory
-# Set environment variables for cache directories
-ENV SENTENCE_TRANSFORMERS_HOME="/tmp/.cache/sentence_transformers"
-ENV HF_HOME="/tmp/.cache/huggingface"
-# Ensure the cache directories exist
-RUN mkdir -p $SENTENCE_TRANSFORMERS_HOME $HF_HOME
-# Add your Python scripts
-COPY infer.py .
-COPY whisper_online.py .
-EXPOSE 7860
-# Run the infer.py script when the container starts
-CMD ["python3", "-u", "/infer.py"]
-# Include Python
-#from python:3.11.1-buster
-#
-## Define your working directory
-#WORKDIR /
-#
-## Install runpod
-#COPY requirements.txt .
-#RUN pip install -r requirements.txt
-#
-#RUN python3 -c 'import faster_whisper; m = faster_whisper.WhisperModel("ivrit-ai/faster-whisper-v2-d3-e3")'
-#
-## Add your file
-#ADD infer.py .
-#ADD whisper_online.py .
-#
-#ENV LD_LIBRARY_PATH="/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib:/usr/local/lib/python3.11/site-packages/nvidia/cublas/lib"
-#
-## Call your file when your container starts
-#CMD [ "python", "-u", "/infer.py" ]

+# Include Python
+from python:3.11.1-buster
+# Define your working directory
+WORKDIR /
+# Install runpod
+RUN pip install runpod
+RUN pip install torch==2.3.1
+RUN pip install faster-whisper
+RUN python3 -c 'import faster_whisper; m = faster_whisper.WhisperModel("ivrit-ai/faster-whisper-v2-d4")'
+# Add your file
+ADD infer.py .
+ENV LD_LIBRARY_PATH="/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib:/usr/local/lib/python3.11/site-packages/nvidia/cublas/lib"
+# Call your file when your container starts
+CMD [ "python", "-u", "/infer.py" ]

infer.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import runpod
 import base64
 import faster_whisper
 import tempfile
@@ -11,195 +9,97 @@ import os
 import whisper_online
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
-# Try to import the module
-try:
-    logging.info("attempting to load whisper online")
-    from whisper_online import *  # Replace 'some_module' with the actual module name
-    logging.info("Successfully imported whisper_online.")
-except ImportError as e:
-    logging.error(f"Failed to import whisper_online: {e}", exc_info=True)
-except Exception as e:
-    logging.error(f"Unknown from exception- error to import whisper_online: {e}", exc_info=True)
-if torch.cuda.is_available():
-    logging.info(f"CUDA is available.")
-else:
-    logging.info("CUDA is not available. Using CPU.")
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model_name = 'ivrit-ai/faster-whisper-v2-d3-e3'
-logging.info(f"Selected model name: {model_name}")
-#model = faster_whisper.WhisperModel(model_name, device=device)
 try:
     lan = 'he'
     logging.info(f"Attempting to initialize FasterWhisperASR with device: {device}")
-    logging.info(f"Cache directory before: {tempfile.gettempdir()}")  # Log the temp directory
-    cache_dir = os.environ.get('XDG_CACHE_HOME', tempfile.gettempdir())
-    logging.info(f"Cache directory after: {tempfile.gettempdir()}")  # Log the temp directory
-    model = whisper_online.FasterWhisperASR(lan=lan, modelsize=model_name, cache_dir=cache_dir, model_dir=None)
     logging.info("FasterWhisperASR model initialized successfully.")
 except Exception as e:
-    logging.error(f"Falied to inilialize faster whisper model {e}")
 # Maximum data size: 200MB
 MAX_PAYLOAD_SIZE = 200 * 1024 * 1024
 def download_file(url, max_size_bytes, output_filename, api_key=None):
-    """
-    Download a file from a given URL with size limit and optional API key.
-    Args:
-    url (str): The URL of the file to download.
-    max_size_bytes (int): Maximum allowed file size in bytes.
-    output_filename (str): The name of the file to save the download as.
-    api_key (str, optional): API key to be used as a bearer token.
-    Returns:
-    bool: True if download was successful, False otherwise.
-    """
     try:
-        # Prepare headers
         headers = {}
         if api_key:
             headers['Authorization'] = f'Bearer {api_key}'
-        # Send a GET request
         response = requests.get(url, stream=True, headers=headers)
-        response.raise_for_status()  # Raises an HTTPError for bad requests
-        # Get the file size if possible
         file_size = int(response.headers.get('Content-Length', 0))
         if file_size > max_size_bytes:
-            print(f"File size ({file_size} bytes) exceeds the maximum allowed size ({max_size_bytes} bytes).")
             return False
-        # Download and write the file
         downloaded_size = 0
         with open(output_filename, 'wb') as file:
             for chunk in response.iter_content(chunk_size=8192):
                 downloaded_size += len(chunk)
                 if downloaded_size > max_size_bytes:
-                    print(f"Download stopped: Size limit exceeded ({max_size_bytes} bytes).")
                     return False
                 file.write(chunk)
         print(f"File downloaded successfully: {output_filename}")
         return True
     except requests.RequestException as e:
         print(f"Error downloading file: {e}")
         return False
-def transcribe(job):
-    datatype = job['input'].get('type', None)
-    if not datatype:
-        return {"error": "datatype field not provided. Should be 'blob' or 'url'."}
-    if not datatype in ['blob', 'url']:
-        return {"error": f"datatype should be 'blob' or 'url', but is {datatype} instead."}
-    # Get the API key from the job input
-    api_key = job['input'].get('api_key', None)
-    with tempfile.TemporaryDirectory() as d:
-        audio_file = f'{d}/audio.mp3'
-        if datatype == 'blob':
-            mp3_bytes = base64.b64decode(job['input']['data'])
-            open(audio_file, 'wb').write(mp3_bytes)
-        elif datatype == 'url':
-            success = download_file(job['input']['url'], MAX_PAYLOAD_SIZE, audio_file, api_key)
-            if not success:
-                return {"error": f"Error downloading data from {job['input']['url']}"}
-        result = transcribe_core(audio_file)
-        return {'result': result}
-def transcribe_core(audio_file):
-    print('Transcribing...')
     ret = {'segments': []}
-    segs, dummy = model.transcribe(audio_file, language='he', word_timestamps=True)
-    for s in segs:
-        words = []
-        for w in s.words:
-            words.append({'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability})
-        seg = {'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text, 'avg_logprob': s.avg_logprob,
-               'compression_ratio': s.compression_ratio, 'no_speech_prob': s.no_speech_prob, 'words': words}
-        print(seg)
-        ret['segments'].append(seg)
     return ret
-#runpod.serverless.start({"handler": transcribe})
 def transcribe_whisper(job):
-    logging.info(f"in triscribe-whisper")
-    datatype = job['input'].get('type', None)
     if not datatype:
         return {"error": "datatype field not provided. Should be 'blob' or 'url'."}
-    if not datatype in ['blob', 'url']:
-        return {"error": f"datatype should be 'blob' or 'url', but is {datatype} instead."}
-    # Get the API key from the job input
-    api_key = job['input'].get('api_key', None)
     with tempfile.TemporaryDirectory() as d:
         audio_file = f'{d}/audio.mp3'
         if datatype == 'blob':
             mp3_bytes = base64.b64decode(job['input']['data'])
-            open(audio_file, 'wb').write(mp3_bytes)
         elif datatype == 'url':
             success = download_file(job['input']['url'], MAX_PAYLOAD_SIZE, audio_file, api_key)
             if not success:
-                return {"error": f"Error downloading data from {job['input']['url']}"}
-        logging.info("Starting transcription process using transcribe_core_whisper.")
         result = transcribe_core_whisper(audio_file)
-        logging.info(f"DONE: in triscribe-whisper")
         return {'result': result}
-def transcribe_core_whisper(audio_file):
-    print('Transcribing...')
-    ret = {'segments': []}
-    try:
-        logging.debug(f"Transcribing audio file: {audio_file}")
-        segs = model.transcribe(audio_file, init_prompt="")
-        logging.info("Transcription completed successfully.")
-        for s in segs:
-            words = []
-            for w in s.words:
-                words.append({'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability})
-            seg = {'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text, 'avg_logprob': s.avg_logprob,
-                   'compression_ratio': s.compression_ratio, 'no_speech_prob': s.no_speech_prob, 'words': words}
-            logging.debug(f"All segments processed. Final transcription result: {ret}")
-            print(seg)
-            ret['segments'].append(seg)
-    except Exception as e:
-        # Log any exception that occurs during the transcription process
-        logging.error(f"Error during transcribe_core_whisper: {e}", exc_info=True)
-        return {"error": str(e)}
-    # Return the final result
-    logging.info("Transcription core function completed.")
-    return ret
-#runpod.serverless.start({"handler": transcribe_whisper})

 import base64
 import faster_whisper
 import tempfile
 import whisper_online
+# Set up logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
+# Load the FasterWhisper model
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model_name = 'ivrit-ai/faster-whisper-v2-d3-e3'
 try:
     lan = 'he'
     logging.info(f"Attempting to initialize FasterWhisperASR with device: {device}")
+    model = whisper_online.FasterWhisperASR(lan=lan, modelsize=model_name)
     logging.info("FasterWhisperASR model initialized successfully.")
 except Exception as e:
+    logging.error(f"Failed to initialize FasterWhisperASR model: {e}")
 # Maximum data size: 200MB
 MAX_PAYLOAD_SIZE = 200 * 1024 * 1024
 def download_file(url, max_size_bytes, output_filename, api_key=None):
+    """Download a file from a given URL with size limit and optional API key."""
     try:
         headers = {}
         if api_key:
             headers['Authorization'] = f'Bearer {api_key}'
         response = requests.get(url, stream=True, headers=headers)
+        response.raise_for_status()
         file_size = int(response.headers.get('Content-Length', 0))
         if file_size > max_size_bytes:
+            print(f"File size exceeds the limit: {file_size} bytes.")
             return False
         downloaded_size = 0
         with open(output_filename, 'wb') as file:
             for chunk in response.iter_content(chunk_size=8192):
                 downloaded_size += len(chunk)
                 if downloaded_size > max_size_bytes:
+                    print(f"Download stopped: size limit exceeded.")
                     return False
                 file.write(chunk)
         print(f"File downloaded successfully: {output_filename}")
         return True
     except requests.RequestException as e:
         print(f"Error downloading file: {e}")
         return False
+def transcribe_core_whisper(audio_file):
+    """Transcribe the audio file using FasterWhisper."""
+    logging.info(f"Transcribing audio file: {audio_file}")
     ret = {'segments': []}
+    try:
+        segs, dummy = model.transcribe(audio_file, language='he', word_timestamps=True)
+        for s in segs:
+            words = [{'start': w.start, 'end': w.end, 'word': w.word, 'probability': w.probability} for w in s.words]
+            seg = {'id': s.id, 'seek': s.seek, 'start': s.start, 'end': s.end, 'text': s.text, 'avg_logprob': s.avg_logprob,
+                   'compression_ratio': s.compression_ratio, 'no_speech_prob': s.no_speech_prob, 'words': words}
+            ret['segments'].append(seg)
+        logging.info("Transcription completed successfully.")
+    except Exception as e:
+        logging.error(f"Error during transcription: {e}", exc_info=True)
     return ret
 def transcribe_whisper(job):
+    """Main transcription handler."""
+    logging.info(f"Processing job: {job}")
+    datatype = job.get('input', {}).get('type')
     if not datatype:
         return {"error": "datatype field not provided. Should be 'blob' or 'url'."}
+    if datatype not in ['blob', 'url']:
+        return {"error": f"Invalid datatype: {datatype}."}
+    api_key = job.get('input', {}).get('api_key')
     with tempfile.TemporaryDirectory() as d:
         audio_file = f'{d}/audio.mp3'
         if datatype == 'blob':
             mp3_bytes = base64.b64decode(job['input']['data'])
+            with open(audio_file, 'wb') as f:
+                f.write(mp3_bytes)
         elif datatype == 'url':
             success = download_file(job['input']['url'], MAX_PAYLOAD_SIZE, audio_file, api_key)
             if not success:
+                return {"error": f"Failed to download from {job['input']['url']}"}
         result = transcribe_core_whisper(audio_file)
         return {'result': result}
+# Example job input to test locally
+if __name__ == "__main__":
+    test_job = {
+        "input": {
+            "type": "url",
+            "url": "https://github.com/metaldaniel/HebrewASR-Comparison/raw/main/HaTankistiot_n12-mp3.mp3",
+        }
+    }
+    print(transcribe_whisper(test_job))

whisper_online.py CHANGED Viewed

@@ -105,15 +105,15 @@ class FasterWhisperASR(ASRBase):
     sep = ""
-    def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
         from faster_whisper import WhisperModel
         #        logging.getLogger("faster_whisper").setLevel(logger.level)
         logging.info("Starting model loading process...")
-        logging.debug(f"Model loading parameters - modelsize: {modelsize}, cache_dir: {cache_dir}, model_dir: {model_dir}")
         if model_dir is not None:
-            logger.debug(
                 f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.")
             model_size_or_path = model_dir
         elif modelsize is not None:
@@ -123,7 +123,10 @@ class FasterWhisperASR(ASRBase):
         try:
             logging.info(f"Loading WhisperModel on device: ")
-            logging.info(f"Cache directory in online: {tempfile.gettempdir()}")  # Log the temp directory
             model = WhisperModel(model_size_or_path, device="cuda", compute_type="float16", download_root=cache_dir)
             logging.info("Model loaded successfully.")
         except Exception as e:

     sep = ""
+    def load_model(self, modelsize=None, cache_dir="/tmp/.cache/huggingface", model_dir=None):
         from faster_whisper import WhisperModel
         #        logging.getLogger("faster_whisper").setLevel(logger.level)
         logging.info("Starting model loading process...")
+        logging.info(f"Model loading parameters - modelsize: {modelsize}, cache_dir: {cache_dir}, model_dir: {model_dir}")
         if model_dir is not None:
+            logger.info(
                 f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.")
             model_size_or_path = model_dir
         elif modelsize is not None:
         try:
             logging.info(f"Loading WhisperModel on device: ")
+            os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/tmp/.cache/sentence_transformers'
+            os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
+            # Ensure the cache directory exists
+            os.makedirs(cache_dir, exist_ok=True)
             model = WhisperModel(model_size_or_path, device="cuda", compute_type="float16", download_root=cache_dir)
             logging.info("Model loaded successfully.")
         except Exception as e: