speech_translation_final

Runtime error

App Files Files Community

jerrypan7 commited on Oct 23, 2024

Commit

ed4e57e

verified ·

1 Parent(s): 644992c

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -23

app.py CHANGED Viewed

@@ -47,8 +47,8 @@ AVAILABLE_SPEAKERS = {
 audio_update_event = asyncio.Event()
 acc_cosy_audio = None
 # cosy voice tts related;
-#TTS_SOCKET_SERVER = "http://localhost:9244"
-TTS_SOCKET_SERVER = "http://astarwiz.com:9444"
 sio = socketio.AsyncClient()
@@ -319,7 +319,24 @@ async def inference_via_llm_api(input_text, min_new_tokens=2, max_new_tokens=64)
                 if "choices" in result:
                     return result["choices"][0]['text'].strip()
             return "The system got some error during vLLM generation. Please try it again."
 async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None, progress_tracker=None):
     global transcription_update, translation_update, audio_update, acc_cosy_audio,audio_update_event
     transcription_update = {"content": "", "new": True}
@@ -370,14 +387,25 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
                 server_url = TTS_SOCKET_SERVER
                 await sio.connect(server_url)
                 print(f"Connected to {server_url}")
             # use defualt voice
             tts_request = {
                 'text': transcription,
                 'overwrite_prompt': False,
                 'promptText':"",
-                'promptAudio':"",
                 'sourceLang':source_lang,
                 'targetLang':target_lang
             }
@@ -636,16 +664,13 @@ with gr.Blocks() as demo:
         async (audioFilePath) => {
         // Debug: Log received audio file path
         console.log("Received audio file path:", audioFilePath);
         if (!window.audioQueue) {
             window.audioQueue = [];
             window.isPlaying = false;
         }
         // Ensure the correct URL for the audio file is available
         if (audioFilePath && audioFilePath.url) {
             console.log("Processing audio file...");
             try {
                 // Fetch and decode the audio file
                 const response = await fetch(audioFilePath.url);
@@ -653,64 +678,51 @@ with gr.Blocks() as demo:
                     console.error("Failed to fetch audio file:", response.statusText);
                     return;
                 }
                 const audioData = await response.arrayBuffer();
                 const audioContext = new AudioContext();
                 const decodedData = await audioContext.decodeAudioData(audioData);
                 // Split the decoded audio buffer into two chunks
                 const totalDuration = decodedData.duration;
                 const midPoint = Math.floor(decodedData.length / 2);  // Midpoint for splitting
                 const sampleRate = decodedData.sampleRate;
                 // Create two separate AudioBuffers for each chunk
                 const firstHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, midPoint, sampleRate);
                 const secondHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, decodedData.length - midPoint, sampleRate);
                 // Copy data from original buffer to the two new buffers
                 for (let channel = 0; channel < decodedData.numberOfChannels; channel++) {
                     firstHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(0, midPoint), channel, 0);
                     secondHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(midPoint), channel, 0);
                 }
                 // Add both chunks to the queue
                 window.audioQueue.push(firstHalfBuffer);
                 window.audioQueue.push(secondHalfBuffer);
                 console.log("Two audio chunks added to queue. Queue length:", window.audioQueue.length);
                 // Function to play the next audio chunk from the queue
                 const playNextChunk = async () => {
                     console.log("Attempting to play next chunk. isPlaying:", window.isPlaying);
                     if (!window.isPlaying && window.audioQueue.length > 0) {
                         console.log("Starting playback...");
                         window.isPlaying = true;
                         // Get the next audio buffer from the queue
                         const audioBuffer = window.audioQueue.shift();
                         console.log("Playing audio chunk from buffer.");
                         const source = audioContext.createBufferSource();
                         source.buffer = audioBuffer;
                         source.connect(audioContext.destination);
                         // When the audio finishes playing, play the next chunk
                         source.onended = () => {
                             console.log("Audio chunk finished playing.");
                             window.isPlaying = false;
                             playNextChunk();  // Play the next audio chunk in the queue
                         };
                         source.start(0);  // Start playing the current chunk
                         console.log("Audio chunk started.");
                     } else {
                         console.log("Already playing or queue is empty.");
                     }
                 };
                 // Start playing the next chunk if not already playing
                 playNextChunk();
             } catch (error) {
                 console.error("Error during audio playback:", error);
                 window.isPlaying = false;
@@ -726,4 +738,3 @@ demo.queue()
 #demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
 asyncio.run(demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD"))))

 audio_update_event = asyncio.Event()
 acc_cosy_audio = None
 # cosy voice tts related;
+TTS_SOCKET_SERVER = "http://localhost:9444"
+#TTS_SOCKET_SERVER = "http://astarwiz.com:9444"
 sio = socketio.AsyncClient()
                 if "choices" in result:
                     return result["choices"][0]['text'].strip()
             return "The system got some error during vLLM generation. Please try it again."
+async def upload_file(file_path, upload_url):
+    print(f"1. Client sends request: {time.time()}")
+    async with aiohttp.ClientSession() as session:
+        with open(file_path, 'rb') as f:
+            form_data = aiohttp.FormData()
+            form_data.add_field('file', f, filename=os.path.basename(file_path))
+            async with session.post(upload_url, data=form_data) as response:
+                print(f"5. Client receives headers: {time.time()}")
+                print(f"Status: {response.status}")
+                result = await response.json()
+                print(f"7. Client fully received and parsed response: {time.time()}")
+                if response.status == 200:
+                    return result
+                else:
+                    return {"file_id",""}
 async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None, progress_tracker=None):
     global transcription_update, translation_update, audio_update, acc_cosy_audio,audio_update_event
     transcription_update = {"content": "", "new": True}
                 server_url = TTS_SOCKET_SERVER
                 await sio.connect(server_url)
                 print(f"Connected to {server_url}")
+            # Handle the audio file
+            file_id=""
+            if audio and os.path.exists(audio):
+                print("upload_url")
+                upload_url = f"{server_url}/upload"  # Adjust this URL as needed
+                print("before call upload_file:")
+                upload_result = await upload_file(audio, upload_url)
+                #print (type(upload_result))
+                print ("upload_result:", upload_result)
+                file_id = upload_result['file_id']
             # use defualt voice
             tts_request = {
                 'text': transcription,
                 'overwrite_prompt': False,
                 'promptText':"",
+                'promptAudio':file_id,
                 'sourceLang':source_lang,
                 'targetLang':target_lang
             }
         async (audioFilePath) => {
         // Debug: Log received audio file path
         console.log("Received audio file path:", audioFilePath);
         if (!window.audioQueue) {
             window.audioQueue = [];
             window.isPlaying = false;
         }
         // Ensure the correct URL for the audio file is available
         if (audioFilePath && audioFilePath.url) {
             console.log("Processing audio file...");
             try {
                 // Fetch and decode the audio file
                 const response = await fetch(audioFilePath.url);
                     console.error("Failed to fetch audio file:", response.statusText);
                     return;
                 }
                 const audioData = await response.arrayBuffer();
                 const audioContext = new AudioContext();
                 const decodedData = await audioContext.decodeAudioData(audioData);
                 // Split the decoded audio buffer into two chunks
                 const totalDuration = decodedData.duration;
                 const midPoint = Math.floor(decodedData.length / 2);  // Midpoint for splitting
                 const sampleRate = decodedData.sampleRate;
                 // Create two separate AudioBuffers for each chunk
                 const firstHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, midPoint, sampleRate);
                 const secondHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, decodedData.length - midPoint, sampleRate);
                 // Copy data from original buffer to the two new buffers
                 for (let channel = 0; channel < decodedData.numberOfChannels; channel++) {
                     firstHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(0, midPoint), channel, 0);
                     secondHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(midPoint), channel, 0);
                 }
                 // Add both chunks to the queue
                 window.audioQueue.push(firstHalfBuffer);
                 window.audioQueue.push(secondHalfBuffer);
                 console.log("Two audio chunks added to queue. Queue length:", window.audioQueue.length);
                 // Function to play the next audio chunk from the queue
                 const playNextChunk = async () => {
                     console.log("Attempting to play next chunk. isPlaying:", window.isPlaying);
                     if (!window.isPlaying && window.audioQueue.length > 0) {
                         console.log("Starting playback...");
                         window.isPlaying = true;
                         // Get the next audio buffer from the queue
                         const audioBuffer = window.audioQueue.shift();
                         console.log("Playing audio chunk from buffer.");
                         const source = audioContext.createBufferSource();
                         source.buffer = audioBuffer;
                         source.connect(audioContext.destination);
                         // When the audio finishes playing, play the next chunk
                         source.onended = () => {
                             console.log("Audio chunk finished playing.");
                             window.isPlaying = false;
                             playNextChunk();  // Play the next audio chunk in the queue
                         };
                         source.start(0);  // Start playing the current chunk
                         console.log("Audio chunk started.");
                     } else {
                         console.log("Already playing or queue is empty.");
                     }
                 };
                 // Start playing the next chunk if not already playing
                 playNextChunk();
             } catch (error) {
                 console.error("Error during audio playback:", error);
                 window.isPlaying = false;
 #demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
 asyncio.run(demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD"))))