Spaces:
Sleeping
Sleeping
File size: 5,841 Bytes
ebaaf9b abaf442 1c789c0 d7b2452 abaf442 ebaaf9b f1bf1b3 a94388a 8004cea 9b933e3 8004cea ebaaf9b f4a3257 1c789c0 f4a3257 8004cea d7b2452 1c789c0 d82fb72 1c789c0 f4a3257 d7b2452 1ab0cdf f4a3257 d7b2452 1c789c0 f4a3257 d7b2452 f4a3257 abaf442 7456852 f4a3257 b85baaf 5a62402 f4a3257 a9516a4 f4a3257 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import asyncio
import json
import logging
import wave
import websockets
import requests
import ssl
import sys
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
handlers=[logging.StreamHandler(sys.stdout)], force=True)
logger = logging.getLogger(__name__)
# Parameters for reading and sending the audio
AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
from pydub import AudioSegment
# Convert and resample audio before writing it to WAV
# Convert and resample audio before writing it to WAV
def convert_to_mono_16k(audio_file_path):
logging.info(f"Starting audio conversion to mono and resampling to 16kHz for file: {audio_file_path}")
try:
# Load the audio file into an AudioSegment object
audio_segment = AudioSegment.from_file(audio_file_path, format="wav")
# Convert the audio to mono and resample it to 16kHz
audio_segment = audio_segment.set_channels(1).set_frame_rate(16000)
logging.info("Audio conversion to mono and 16kHz completed successfully.")
except Exception as e:
logging.error(f"Error during audio conversion: {e}")
raise e
# Return the modified AudioSegment object
return audio_segment
async def send_audio(websocket):
print(f"hi")
buffer_size = 1024 * 16 # Send smaller chunks (16KB) for real-time processing
logging.info("Converting the audio to mono and 16kHz.")
try:
converted_audio = convert_to_mono_16k('test_copy.wav')
except Exception as e:
logging.error(f"Failed to convert audio: {e}")
return
# Send metadata to the server
metadata = {
'sample_rate': 16000, # Resampled rate
'channels': 1, # Converted to mono
'sampwidth': 2 # Assuming 16-bit audio
}
await websocket.send(json.dumps(metadata))
logging.info(f"Sent metadata: {metadata}")
try:
raw_data = converted_audio.raw_data
logging.info(f"Starting to send raw PCM audio data. Total data size: {len(raw_data)} bytes.")
for i in range(0, len(raw_data), buffer_size):
pcm_chunk = raw_data[i:i + buffer_size]
await websocket.send(pcm_chunk) # Send raw PCM data chunk
logging.info(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
await asyncio.sleep(0.01) # Simulate real-time sending
logging.info("Completed sending all audio data.")
except Exception as e:
logging.error(f"Error while sending audio data: {e}")
# Download the WAV file locally
# with requests.get(AUDIO_FILE_URL, stream=True) as response:
# if response.status_code == 200:
# with open('downloaded_audio.wav', 'wb') as f:
# for chunk in response.iter_content(chunk_size=1024):
# f.write(chunk)
# print("Audio file downloaded successfully.")
# Open the downloaded WAV file and extract PCM data
# with wave.open('test_copy.wav', 'rb') as wav_file:
# metadata = {
# 'sample_rate': wav_file.getframerate(),
# 'channels': wav_file.getnchannels(),
# 'sampwidth': wav_file.getsampwidth(),
# }
#
# # Send metadata to the server before sending the audio
# await websocket.send(json.dumps(metadata))
# print(f"Sent metadata: {metadata}")
# # Send the PCM audio data in chunks
# while True:
# pcm_chunk = wav_file.readframes(buffer_size)
# if not pcm_chunk:
# break # End of file
#
# await websocket.send(pcm_chunk) # Send raw PCM data chunk
# #print(f"Sent PCM chunk of size {len(pcm_chunk)} bytes.")
# await asyncio.sleep(0.01) # Simulate real-time sending
# else:
# print(f"Failed to download audio file. Status code: {response.status_code}")
async def receive_transcription(websocket):
while True:
try:
transcription = await websocket.recv() # Receive transcription from the server
print(f"Transcription: {transcription}")
transcription = json.loads(transcription)
#download_url = transcription.get('download_url')
# if download_url:
# print(f"Download URL: {download_url}")
# # Download the audio file
# response = requests.get(download_url)
# if response.status_code == 200:
# with open("downloaded_audio.wav", "wb") as f:
# f.write(response.content)
# print("File downloaded successfully")
# else:
# print(f"Failed to download file. Status code: {response.status_code}")
except Exception as e:
print(f"Error receiving transcription: {e}")
break
async def send_heartbeat(websocket):
while True:
try:
await websocket.ping()
print("Sent keepalive ping")
except websockets.ConnectionClosed:
print("Connection closed, stopping heartbeat")
break
await asyncio.sleep(30) # Send ping every 30 seconds (adjust as needed)
async def run_client():
uri = ("wss://gigaverse-ivrit-ai-streaming.hf.space/wtranscribe") # WebSocket URL
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
async with websockets.connect(uri, ssl=ssl_context, timeout=60) as websocket:
await asyncio.gather(
send_audio(websocket),
receive_transcription(websocket),
send_heartbeat(websocket)
)
asyncio.run(run_client()) |