import streamlit as st import sounddevice as sd import soundfile as sf from faster_whisper import WhisperModel import io import os from langchain_community.llms import Ollama import pyttsx3 # Set environment variable to handle duplicate libraries os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Initialize WhisperModel and Ollama model_size = "base.en" model = WhisperModel(model_size, device="cpu", compute_type="int8", num_workers=5) llm = Ollama(model="tinyllama") # Initialize text-to-speech engine engine = pyttsx3.init('sapi5') voices = engine.getProperty('voices') engine.setProperty('voice',voices[0].id) engine.setProperty('rate',180) def speak(audio): engine.say(audio) engine.runAndWait() # Record and transcribe audio audio_data = st.audio("recorded_audio.wav", format="audio/wav", start_time=0) if st.button("Record"): with st.spinner("Recording..."): recorded_audio = sd.rec(int(5 * 44100), samplerate=44100, channels=2, dtype="int16") sd.wait() sf.write("recorded_audio.wav", recorded_audio, samplerate=44100) st.audio("recorded_audio.wav", format="audio/wav", start_time=0) # Transcribe audio and speak response with open("recorded_audio.wav", "rb") as audio_file: segments,info= model.transcribe(io.BytesIO(audio_file.read()), beam_size=10) for segment in segments: prompt=segment.text print(prompt) st.text(prompt) if prompt: response = llm.invoke(prompt) st.success("Response: " + response) speak(response) st.stop() else: st.error("Failed to transcribe audio.")