import os import gradio as gr from gtts import gTTS from openai import OpenAI client = OpenAI() def transcribe(audio): try: if audio is None: return "No audio input received" # Get the audio file path from the tuple if it exists audio_path = audio if isinstance(audio, str) else audio[0] if not os.path.exists(audio_path): return "Audio file not found" with open(audio_path, "rb") as audio_file: transcription = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) return transcription.text except Exception as e: print(f"Transcription error: {str(e)}") return "Error in transcription. Please try again." def generate_response(transcribed_text): try: if transcribed_text in ["No audio input received", "Error in transcription. Please try again.", "Audio file not found"]: return transcribed_text response = client.chat.completions.create( model="gpt-4", messages=[ { "role": "system", "content": "All your answers should be in swahili only, users undertands swahili only so here we start... Wewe ni mtaalamu wa viazi lishe na utajibu maswali yote kwa kiswahili tu!" }, {"role": "user", "content": "Mambo vipi?"}, {"role": "assistant", "content": "Salama je una swali lolote kuhusu viazi lishe?"}, {"role": "user", "content": "nini maana ya Viazi lishe?"}, { "role": "assistant", "content": "viazi lishe ni Viazi vitamu vyenye rangi ya karoti kwa ndani ambavyo vina vitamin A kwa wingi" }, {"role": "user", "content": transcribed_text}, ] ) return response.choices[0].message.content except Exception as e: print(f"OpenAI API error: {str(e)}") return "Samahani, kuna hitilafu. Tafadhali jaribu tena." def inference(text): try: if text in ["No audio input received", "Error in transcription. Please try again.", "Audio file not found"]: return None response = client.audio.speech.create( model="tts-1", voice="nova", input=text ) output_file = "tts_output.mp3" response.stream_to_file(output_file) return output_file except Exception as e: print(f"Text-to-speech error: {str(e)}") return None def process_audio_and_respond(audio): try: if audio is None: return "Tafadhali rekodi sauti yako kwanza", None text = transcribe(audio) print(f"Transcribed text: {text}") # Debug print response_text = generate_response(text) print(f"Generated response: {response_text}") # Debug print output_file = inference(response_text) return response_text, output_file except Exception as e: print(f"Process error: {str(e)}") return "Samahani, kuna hitilafu. Tafadhali jaribu tena.", None demo = gr.Interface( fn=process_audio_and_respond, inputs=gr.Audio( sources=["microphone"], type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako" ), outputs=[ gr.Textbox(label="Jibu (kwa njia ya maandishi)"), gr.Audio(label="Jibu kwa njia ya sauti (Bofya kusikiliza Jibu)") ], description="Uliza Mtaalamu wetu swali lolote Kuhusu viazi Lishe", allow_flagging="never", ) demo.launch()