Spaces:

cdcvd
/

Nutrition_diet_assistant

Sleeping

App Files Files Community

cdcvd commited on Dec 22, 2024

Commit

9ffdb83

verified ·

1 Parent(s): 5ddcb7c

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -102

app.py CHANGED Viewed

@@ -188,43 +188,43 @@ def get_chat_history_string(chat_memory):
     return history
 # تابع تبدیل متن به گفتار با استفاده از Azure Speech SDK
-def synthesize_speech(text):
-    try:
-        speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_API_KEY, region=AZURE_SPEECH_REGION)
-        speech_config.speech_synthesis_voice_name = AZURE_SPEECH_VOICE_NAME
-        # تنظیم فرمت خروجی صوتی
-        speech_config.set_speech_synthesis_output_format(
-            speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
-        )
-        # تولید نام فایل موقت برای ذخیره صوت
-        voice_generate_path = f'{uuid.uuid4()}.mp3'
-        temp_voice_generate_path = os.path.join(os.getcwd(), voice_generate_path)
-        # تنظیم خروجی به فایل
-        audio_config = speechsdk.audio.AudioOutputConfig(filename=temp_voice_generate_path)
-        speech_synthesizer = speechsdk.SpeechSynthesizer(
-            speech_config=speech_config,
-            audio_config=audio_config
-        )
-        speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
-        if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-            # خواندن داده‌های صوتی از فایل
-            with open(temp_voice_generate_path, "rb") as audio_file:
-                audio_data = audio_file.read()
-            # حذف فایل موقت
-            os.remove(temp_voice_generate_path)
-            return audio_data
-        else:
-            print("خطا در تبدیل متن به گفتار:", speech_synthesis_result.reason)
-            return None
-    except Exception as e:
-        print("خطا در تبدیل متن به گفتار:", e)
-        return None
 def get_model_response(messages):
     response = client.chat.completions.create(
         model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
@@ -346,41 +346,41 @@ import requests
 #     else:
 #         return {"error": response.json()}
-def process_audio(audio):
-    audio_file = open(audio, "rb")
-    try:
-        audio = AudioSegment.from_file(audio_file)
-    except CouldntDecodeError:
-        os.remove(audio)
-        return "Unsupported audio format"
-    # بررسی مدت زمان فایل صوتی
-    duration_seconds = len(audio_file) / 1000.0  # مدت زمان به ثانیه
-    if duration_seconds > 900:
-        os.remove(audio)
-        return "Audio file is too long"
-    project_root = os.path.dirname(os.path.dirname(__file__))
-    voice_id = str(uuid.uuid4())
-    # تبدیل به فرمت WAV سازگار با Whisper
-    whisper_path = f'{voice_id}.wav'
-    whisper_voice_path = os.path.join(project_root, whisper_path)
-    audio.export(whisper_voice_path, format='wav')
-    # ارسال درخواست به API Hugging Face
-    output = query(whisper_voice_path)
-    # حذف فایل موقت
-    os.remove(whisper_voice_path)
-    if "error" in output:
-        print("Error:", output["error"])
-        return "Transcription failed"
-    else:
-        text_question = output.get("text", "")
-        print("text_question =", text_question)
-        return text_question
 # def process_audio(audio):
 #     # باز کردن فایل صوتی ضبط‌شده
@@ -433,14 +433,14 @@ def clear_memory():
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(show_label=False, placeholder="Send Message")
-    audio_input = gr.Audio(sources="microphone", type="filepath",label="Audio voice to voice")
-    audio_output = gr.Audio(label="Assistant voice response")  # افزودن کامپوننت صوتی برای خروجی
-    inputs=gr.Audio(sources="microphone", type="filepath",label="Audio voice to text")
     with gr.Row():
         submit_btn = gr.Button("send")
-        voice_btn = gr.Button("voice to voice")
         clear_btn = gr.Button("clear_chat 🧹")
-        voice_btn1 = gr.Button("voice to text")
     def fix_bidi_text(text):
         RLE = '\u202B'  # Right-To-Left Embedding
         PDF = '\u202C'  # Pop Directional Formatting
@@ -454,46 +454,46 @@ with gr.Blocks() as demo:
         chat_history.append((fixed_message, fixed_bot_response))
         # تبدیل پاسخ به صوت
-        audio_data = synthesize_speech(bot_response)
         return chat_history, ""  # افزودن audio_data به خروجی
-    def response_voice(audio, chat_history):
-        if not audio:
-            return chat_history, "فایل صوتی ارائه نشده است.", None
-        # پردازش فایل صوتی و دریافت متن تبدیل‌شده
-        transcribed_text = process_audio(audio)
-        # دریافت پاسخ مدل با استفاده از متن تبدیل‌شده
-        bot_response = agent_respond(transcribed_text)
-        # اصلاح متن‌ها برای نمایش راست‌چین
-        fixed_user_message = fix_bidi_text(transcribed_text)
-        fixed_bot_response = fix_bidi_text(bot_response)
-        # افزودن پیام‌ها به تاریخچه چت
-        chat_history.append((fixed_user_message, fixed_bot_response))
-        # تبدیل پاسخ به صوت
-        audio_data = synthesize_speech(bot_response)
-        return chat_history, "", audio_data
-        # افزودن audio_data به خروجی
-    def response_voice1(audio, chat_history):
-        if not audio:
-            return chat_history, "No audio file provided."
-        # پردازش فایل صوتی
-        bot_response = process_audio(audio)
-        fixed_bot_response = fix_bidi_text(bot_response)
-        chat_history.append(("صدا ارسال شد", fixed_bot_response))
-        return chat_history, ""
     submit_btn.click(respond, [msg, chatbot], [chatbot, msg])
-    voice_btn.click(response_voice, [audio_input, chatbot], [chatbot, msg, audio_output])
-    voice_btn1.click(response_voice1, [inputs, chatbot], [chatbot, msg])
     msg.submit(respond, [msg, chatbot], [chatbot, msg])
-    clear_btn.click(clear_memory, inputs=None, outputs=[chatbot, msg, audio_output,audio_input])
 # اجرای Gradio
 if __name__ == "__main__":

     return history
 # تابع تبدیل متن به گفتار با استفاده از Azure Speech SDK
+# def synthesize_speech(text):
+#     try:
+#         speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_API_KEY, region=AZURE_SPEECH_REGION)
+#         speech_config.speech_synthesis_voice_name = AZURE_SPEECH_VOICE_NAME
+#         # تنظیم فرمت خروجی صوتی
+#         speech_config.set_speech_synthesis_output_format(
+#             speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
+#         )
+#         # تولید نام فایل موقت برای ذخیره صوت
+#         voice_generate_path = f'{uuid.uuid4()}.mp3'
+#         temp_voice_generate_path = os.path.join(os.getcwd(), voice_generate_path)
+#         # تنظیم خروجی به فایل
+#         audio_config = speechsdk.audio.AudioOutputConfig(filename=temp_voice_generate_path)
+#         speech_synthesizer = speechsdk.SpeechSynthesizer(
+#             speech_config=speech_config,
+#             audio_config=audio_config
+#         )
+#         speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
+#         if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+#             # خواندن داده‌های صوتی از فایل
+#             with open(temp_voice_generate_path, "rb") as audio_file:
+#                 audio_data = audio_file.read()
+#             # حذف فایل موقت
+#             os.remove(temp_voice_generate_path)
+#             return audio_data
+#         else:
+#             print("خطا در تبدیل متن به گفتار:", speech_synthesis_result.reason)
+#             return None
+#     except Exception as e:
+#         print("خطا در تبدیل متن به گفتار:", e)
+#         return None
 def get_model_response(messages):
     response = client.chat.completions.create(
         model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
 #     else:
 #         return {"error": response.json()}
+# def process_audio(audio):
+#     audio_file = open(audio, "rb")
+#     try:
+#         audio = AudioSegment.from_file(audio_file)
+#     except CouldntDecodeError:
+#         os.remove(audio)
+#         return "Unsupported audio format"
+#     # بررسی مدت زمان فایل صوتی
+#     duration_seconds = len(audio_file) / 1000.0  # مدت زمان به ثانیه
+#     if duration_seconds > 900:
+#         os.remove(audio)
+#         return "Audio file is too long"
+#     project_root = os.path.dirname(os.path.dirname(__file__))
+#     voice_id = str(uuid.uuid4())
+#     # تبدیل به فرمت WAV سازگار با Whisper
+#     whisper_path = f'{voice_id}.wav'
+#     whisper_voice_path = os.path.join(project_root, whisper_path)
+#     audio.export(whisper_voice_path, format='wav')
+#     # ارسال درخواست به API Hugging Face
+#     output = query(whisper_voice_path)
+#     # حذف فایل موقت
+#     os.remove(whisper_voice_path)
+#     if "error" in output:
+#         print("Error:", output["error"])
+#         return "Transcription failed"
+#     else:
+#         text_question = output.get("text", "")
+#         print("text_question =", text_question)
+#         return text_question
 # def process_audio(audio):
 #     # باز کردن فایل صوتی ضبط‌شده
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(show_label=False, placeholder="Send Message")
+    # audio_input = gr.Audio(sources="microphone", type="filepath",label="Audio voice to voice")
+    # audio_output = gr.Audio(label="Assistant voice response")  # افزودن کامپوننت صوتی برای خروجی
+    # inputs=gr.Audio(sources="microphone", type="filepath",label="Audio voice to text")
     with gr.Row():
         submit_btn = gr.Button("send")
+        # voice_btn = gr.Button("voice to voice")
         clear_btn = gr.Button("clear_chat 🧹")
+        # voice_btn1 = gr.Button("voice to text")
     def fix_bidi_text(text):
         RLE = '\u202B'  # Right-To-Left Embedding
         PDF = '\u202C'  # Pop Directional Formatting
         chat_history.append((fixed_message, fixed_bot_response))
         # تبدیل پاسخ به صوت
+        # audio_data = synthesize_speech(bot_response)
         return chat_history, ""  # افزودن audio_data به خروجی
+    # def response_voice(audio, chat_history):
+    #     if not audio:
+    #         return chat_history, "فایل صوتی ارائه نشده است.", None
+    #     # پردازش فایل صوتی و دریافت متن تبدیل‌شده
+    #     transcribed_text = process_audio(audio)
+    #     # دریافت پاسخ مدل با استفاده از متن تبدیل‌شده
+    #     bot_response = agent_respond(transcribed_text)
+    #     # اصلاح متن‌ها برای نمایش راست‌چین
+    #     fixed_user_message = fix_bidi_text(transcribed_text)
+    #     fixed_bot_response = fix_bidi_text(bot_response)
+    #     # افزودن پیام‌ها به تاریخچه چت
+    #     chat_history.append((fixed_user_message, fixed_bot_response))
+    #     # تبدیل پاسخ به صوت
+    #     audio_data = synthesize_speech(bot_response)
+    #     return chat_history, "", audio_data
+    #     # افزودن audio_data به خروجی
+    # def response_voice1(audio, chat_history):
+    #     if not audio:
+    #         return chat_history, "No audio file provided."
+    #     # پردازش فایل صوتی
+    #     bot_response = process_audio(audio)
+    #     fixed_bot_response = fix_bidi_text(bot_response)
+    #     chat_history.append(("صدا ارسال شد", fixed_bot_response))
+    #     return chat_history, ""
     submit_btn.click(respond, [msg, chatbot], [chatbot, msg])
+    # voice_btn.click(response_voice, [audio_input, chatbot], [chatbot, msg, audio_output])
+    # voice_btn1.click(response_voice1, [inputs, chatbot], [chatbot, msg])
     msg.submit(respond, [msg, chatbot], [chatbot, msg])
+    clear_btn.click(clear_memory, inputs=None, outputs=[chatbot, msg])
 # اجرای Gradio
 if __name__ == "__main__":