Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -47,8 +47,8 @@ AVAILABLE_SPEAKERS = {
|
|
47 |
audio_update_event = asyncio.Event()
|
48 |
acc_cosy_audio = None
|
49 |
# cosy voice tts related;
|
50 |
-
|
51 |
-
TTS_SOCKET_SERVER = "http://astarwiz.com:9444"
|
52 |
|
53 |
sio = socketio.AsyncClient()
|
54 |
|
@@ -319,7 +319,24 @@ async def inference_via_llm_api(input_text, min_new_tokens=2, max_new_tokens=64)
|
|
319 |
if "choices" in result:
|
320 |
return result["choices"][0]['text'].strip()
|
321 |
return "The system got some error during vLLM generation. Please try it again."
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None, progress_tracker=None):
|
324 |
global transcription_update, translation_update, audio_update, acc_cosy_audio,audio_update_event
|
325 |
transcription_update = {"content": "", "new": True}
|
@@ -370,14 +387,25 @@ async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None
|
|
370 |
server_url = TTS_SOCKET_SERVER
|
371 |
await sio.connect(server_url)
|
372 |
print(f"Connected to {server_url}")
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
# use defualt voice
|
376 |
tts_request = {
|
377 |
'text': transcription,
|
378 |
'overwrite_prompt': False,
|
379 |
'promptText':"",
|
380 |
-
'promptAudio':
|
381 |
'sourceLang':source_lang,
|
382 |
'targetLang':target_lang
|
383 |
}
|
@@ -636,16 +664,13 @@ with gr.Blocks() as demo:
|
|
636 |
async (audioFilePath) => {
|
637 |
// Debug: Log received audio file path
|
638 |
console.log("Received audio file path:", audioFilePath);
|
639 |
-
|
640 |
if (!window.audioQueue) {
|
641 |
window.audioQueue = [];
|
642 |
window.isPlaying = false;
|
643 |
}
|
644 |
-
|
645 |
// Ensure the correct URL for the audio file is available
|
646 |
if (audioFilePath && audioFilePath.url) {
|
647 |
console.log("Processing audio file...");
|
648 |
-
|
649 |
try {
|
650 |
// Fetch and decode the audio file
|
651 |
const response = await fetch(audioFilePath.url);
|
@@ -653,64 +678,51 @@ with gr.Blocks() as demo:
|
|
653 |
console.error("Failed to fetch audio file:", response.statusText);
|
654 |
return;
|
655 |
}
|
656 |
-
|
657 |
const audioData = await response.arrayBuffer();
|
658 |
const audioContext = new AudioContext();
|
659 |
const decodedData = await audioContext.decodeAudioData(audioData);
|
660 |
-
|
661 |
// Split the decoded audio buffer into two chunks
|
662 |
const totalDuration = decodedData.duration;
|
663 |
const midPoint = Math.floor(decodedData.length / 2); // Midpoint for splitting
|
664 |
const sampleRate = decodedData.sampleRate;
|
665 |
-
|
666 |
// Create two separate AudioBuffers for each chunk
|
667 |
const firstHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, midPoint, sampleRate);
|
668 |
const secondHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, decodedData.length - midPoint, sampleRate);
|
669 |
-
|
670 |
// Copy data from original buffer to the two new buffers
|
671 |
for (let channel = 0; channel < decodedData.numberOfChannels; channel++) {
|
672 |
firstHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(0, midPoint), channel, 0);
|
673 |
secondHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(midPoint), channel, 0);
|
674 |
}
|
675 |
-
|
676 |
// Add both chunks to the queue
|
677 |
window.audioQueue.push(firstHalfBuffer);
|
678 |
window.audioQueue.push(secondHalfBuffer);
|
679 |
console.log("Two audio chunks added to queue. Queue length:", window.audioQueue.length);
|
680 |
-
|
681 |
// Function to play the next audio chunk from the queue
|
682 |
const playNextChunk = async () => {
|
683 |
console.log("Attempting to play next chunk. isPlaying:", window.isPlaying);
|
684 |
-
|
685 |
if (!window.isPlaying && window.audioQueue.length > 0) {
|
686 |
console.log("Starting playback...");
|
687 |
window.isPlaying = true;
|
688 |
-
|
689 |
// Get the next audio buffer from the queue
|
690 |
const audioBuffer = window.audioQueue.shift();
|
691 |
console.log("Playing audio chunk from buffer.");
|
692 |
-
|
693 |
const source = audioContext.createBufferSource();
|
694 |
source.buffer = audioBuffer;
|
695 |
source.connect(audioContext.destination);
|
696 |
-
|
697 |
// When the audio finishes playing, play the next chunk
|
698 |
source.onended = () => {
|
699 |
console.log("Audio chunk finished playing.");
|
700 |
window.isPlaying = false;
|
701 |
playNextChunk(); // Play the next audio chunk in the queue
|
702 |
};
|
703 |
-
|
704 |
source.start(0); // Start playing the current chunk
|
705 |
console.log("Audio chunk started.");
|
706 |
} else {
|
707 |
console.log("Already playing or queue is empty.");
|
708 |
}
|
709 |
};
|
710 |
-
|
711 |
// Start playing the next chunk if not already playing
|
712 |
playNextChunk();
|
713 |
-
|
714 |
} catch (error) {
|
715 |
console.error("Error during audio playback:", error);
|
716 |
window.isPlaying = false;
|
@@ -726,4 +738,3 @@ demo.queue()
|
|
726 |
|
727 |
#demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
|
728 |
asyncio.run(demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD"))))
|
729 |
-
|
|
|
47 |
audio_update_event = asyncio.Event()
|
48 |
acc_cosy_audio = None
|
49 |
# cosy voice tts related;
|
50 |
+
TTS_SOCKET_SERVER = "http://localhost:9444"
|
51 |
+
#TTS_SOCKET_SERVER = "http://astarwiz.com:9444"
|
52 |
|
53 |
sio = socketio.AsyncClient()
|
54 |
|
|
|
319 |
if "choices" in result:
|
320 |
return result["choices"][0]['text'].strip()
|
321 |
return "The system got some error during vLLM generation. Please try it again."
|
322 |
+
async def upload_file(file_path, upload_url):
|
323 |
+
print(f"1. Client sends request: {time.time()}")
|
324 |
+
async with aiohttp.ClientSession() as session:
|
325 |
+
with open(file_path, 'rb') as f:
|
326 |
+
form_data = aiohttp.FormData()
|
327 |
+
form_data.add_field('file', f, filename=os.path.basename(file_path))
|
328 |
+
|
329 |
+
async with session.post(upload_url, data=form_data) as response:
|
330 |
+
print(f"5. Client receives headers: {time.time()}")
|
331 |
+
print(f"Status: {response.status}")
|
332 |
+
|
333 |
+
result = await response.json()
|
334 |
+
print(f"7. Client fully received and parsed response: {time.time()}")
|
335 |
+
if response.status == 200:
|
336 |
+
return result
|
337 |
+
else:
|
338 |
+
return {"file_id",""}
|
339 |
+
|
340 |
async def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, target_speaker=None, progress_tracker=None):
|
341 |
global transcription_update, translation_update, audio_update, acc_cosy_audio,audio_update_event
|
342 |
transcription_update = {"content": "", "new": True}
|
|
|
387 |
server_url = TTS_SOCKET_SERVER
|
388 |
await sio.connect(server_url)
|
389 |
print(f"Connected to {server_url}")
|
390 |
+
|
391 |
+
|
392 |
+
# Handle the audio file
|
393 |
+
file_id=""
|
394 |
+
if audio and os.path.exists(audio):
|
395 |
+
print("upload_url")
|
396 |
+
upload_url = f"{server_url}/upload" # Adjust this URL as needed
|
397 |
+
print("before call upload_file:")
|
398 |
+
upload_result = await upload_file(audio, upload_url)
|
399 |
+
#print (type(upload_result))
|
400 |
+
print ("upload_result:", upload_result)
|
401 |
+
file_id = upload_result['file_id']
|
402 |
+
|
403 |
# use defualt voice
|
404 |
tts_request = {
|
405 |
'text': transcription,
|
406 |
'overwrite_prompt': False,
|
407 |
'promptText':"",
|
408 |
+
'promptAudio':file_id,
|
409 |
'sourceLang':source_lang,
|
410 |
'targetLang':target_lang
|
411 |
}
|
|
|
664 |
async (audioFilePath) => {
|
665 |
// Debug: Log received audio file path
|
666 |
console.log("Received audio file path:", audioFilePath);
|
|
|
667 |
if (!window.audioQueue) {
|
668 |
window.audioQueue = [];
|
669 |
window.isPlaying = false;
|
670 |
}
|
|
|
671 |
// Ensure the correct URL for the audio file is available
|
672 |
if (audioFilePath && audioFilePath.url) {
|
673 |
console.log("Processing audio file...");
|
|
|
674 |
try {
|
675 |
// Fetch and decode the audio file
|
676 |
const response = await fetch(audioFilePath.url);
|
|
|
678 |
console.error("Failed to fetch audio file:", response.statusText);
|
679 |
return;
|
680 |
}
|
|
|
681 |
const audioData = await response.arrayBuffer();
|
682 |
const audioContext = new AudioContext();
|
683 |
const decodedData = await audioContext.decodeAudioData(audioData);
|
|
|
684 |
// Split the decoded audio buffer into two chunks
|
685 |
const totalDuration = decodedData.duration;
|
686 |
const midPoint = Math.floor(decodedData.length / 2); // Midpoint for splitting
|
687 |
const sampleRate = decodedData.sampleRate;
|
|
|
688 |
// Create two separate AudioBuffers for each chunk
|
689 |
const firstHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, midPoint, sampleRate);
|
690 |
const secondHalfBuffer = audioContext.createBuffer(decodedData.numberOfChannels, decodedData.length - midPoint, sampleRate);
|
|
|
691 |
// Copy data from original buffer to the two new buffers
|
692 |
for (let channel = 0; channel < decodedData.numberOfChannels; channel++) {
|
693 |
firstHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(0, midPoint), channel, 0);
|
694 |
secondHalfBuffer.copyToChannel(decodedData.getChannelData(channel).slice(midPoint), channel, 0);
|
695 |
}
|
|
|
696 |
// Add both chunks to the queue
|
697 |
window.audioQueue.push(firstHalfBuffer);
|
698 |
window.audioQueue.push(secondHalfBuffer);
|
699 |
console.log("Two audio chunks added to queue. Queue length:", window.audioQueue.length);
|
|
|
700 |
// Function to play the next audio chunk from the queue
|
701 |
const playNextChunk = async () => {
|
702 |
console.log("Attempting to play next chunk. isPlaying:", window.isPlaying);
|
|
|
703 |
if (!window.isPlaying && window.audioQueue.length > 0) {
|
704 |
console.log("Starting playback...");
|
705 |
window.isPlaying = true;
|
|
|
706 |
// Get the next audio buffer from the queue
|
707 |
const audioBuffer = window.audioQueue.shift();
|
708 |
console.log("Playing audio chunk from buffer.");
|
|
|
709 |
const source = audioContext.createBufferSource();
|
710 |
source.buffer = audioBuffer;
|
711 |
source.connect(audioContext.destination);
|
|
|
712 |
// When the audio finishes playing, play the next chunk
|
713 |
source.onended = () => {
|
714 |
console.log("Audio chunk finished playing.");
|
715 |
window.isPlaying = false;
|
716 |
playNextChunk(); // Play the next audio chunk in the queue
|
717 |
};
|
|
|
718 |
source.start(0); // Start playing the current chunk
|
719 |
console.log("Audio chunk started.");
|
720 |
} else {
|
721 |
console.log("Already playing or queue is empty.");
|
722 |
}
|
723 |
};
|
|
|
724 |
// Start playing the next chunk if not already playing
|
725 |
playNextChunk();
|
|
|
726 |
} catch (error) {
|
727 |
console.error("Error during audio playback:", error);
|
728 |
window.isPlaying = false;
|
|
|
738 |
|
739 |
#demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD")))
|
740 |
asyncio.run(demo.launch(auth=(os.getenv("DEV_USER"), os.getenv("DEV_PWD"))))
|
|