Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

App Files Files Community

AshDavid12 commited on Sep 10, 2024

Commit

47058ca

1 Parent(s): 6af664b

trying ivrit model

Browse files

Files changed (2) hide show

infer.py +80 -19
requirements.txt +0 -2

infer.py CHANGED Viewed

@@ -1,33 +1,94 @@
 import faster_whisper
 import requests
-from pydub import AudioSegment
-import io
 # Load the faster-whisper model that supports Hebrew
 model = faster_whisper.WhisperModel("ivrit-ai/faster-whisper-v2-d4")
-# URL of the mp3 audio file
-audio_url = "https://github.com/metaldaniel/HebrewASR-Comparison/raw/main/HaTankistiot_n12-mp3.mp3"
-# Download the mp3 audio file from the URL
 response = requests.get(audio_url)
 if response.status_code != 200:
     raise Exception("Failed to download audio file")
-# Load the mp3 audio into an in-memory buffer
-mp3_audio = io.BytesIO(response.content)
-# Convert the mp3 audio to wav using pydub (in memory)
-audio = AudioSegment.from_file(mp3_audio, format="mp3")
-wav_audio = io.BytesIO()
-audio.export(wav_audio, format="wav")
-wav_audio.seek(0)  # Reset the pointer to the beginning of the buffer
-# Save the in-memory wav audio to a temporary file-like object
-with io.BytesIO(wav_audio.read()) as temp_wav_file:
-    # Perform the transcription
-    segments, info = model.transcribe(temp_wav_file, language="he")
-    # Print transcription results
-    for segment in segments:
-        print(f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}")

 import faster_whisper
 import requests
+import tempfile
+import os
 # Load the faster-whisper model that supports Hebrew
 model = faster_whisper.WhisperModel("ivrit-ai/faster-whisper-v2-d4")
+# URL of the audio file (replace this with the actual URL of your audio)
+audio_url = "https://github.com/AshDavid12/runpod-serverless-forked/blob/main/me-hebrew.wav"
+# Download the audio file from the URL
 response = requests.get(audio_url)
 if response.status_code != 200:
     raise Exception("Failed to download audio file")
+# Create a temporary file to store the audio
+with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_file:
+    tmp_audio_file.write(response.content)
+    tmp_audio_file_path = tmp_audio_file.name
+# Perform the transcription
+segments, info = model.transcribe(tmp_audio_file_path, language="he")
+# Print transcription results
+for segment in segments:
+    print(f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}")
+# Clean up the temporary file
+os.remove(tmp_audio_file_path)
+# import torch
+# from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# import requests
+# import soundfile as sf
+# import io
+# # Load the Whisper model and processor from Hugging Face Model Hub
+# model_name = "openai/whisper-base"
+# processor = WhisperProcessor.from_pretrained(model_name)
+# model = WhisperForConditionalGeneration.from_pretrained(model_name)
+#
+# # Use GPU if available, otherwise use CPU
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+# model.to(device)
+#
+# # URL of the audio file
+# audio_url = "https://www.signalogic.com/melp/EngSamples/Orig/male.wav"
+#
+# # Download the audio file
+# response = requests.get(audio_url)
+# audio_data = io.BytesIO(response.content)
+#
+# # Read the audio using soundfile
+# audio_input, _ = sf.read(audio_data)
+#
+# # Preprocess the audio for Whisper
+# inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
+# attention_mask = inputs['input_features'].ne(processor.tokenizer.pad_token_id).long()
+#
+# # Move inputs and attention mask to the correct device
+# inputs = {key: value.to(device) for key, value in inputs.items()}
+# attention_mask = attention_mask.to(device)
+#
+# # Generate the transcription with attention mask
+# with torch.no_grad():
+#     predicted_ids = model.generate(
+#         inputs["input_features"],
+#         attention_mask=attention_mask  # Pass attention mask explicitly
+#     )
+# # Decode the transcription
+# transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+#
+# # Print the transcription result
+# print("Transcription:", transcription)

requirements.txt CHANGED Viewed

@@ -4,6 +4,4 @@ requests
 transformers
 soundfile
 faster-whisper
-pydub
-ffmpeg

 transformers
 soundfile
 faster-whisper