|
import streamlit as st |
|
import sounddevice as sd |
|
import soundfile as sf |
|
from faster_whisper import WhisperModel |
|
import io |
|
import os |
|
from langchain_community.llms import Ollama |
|
import pyttsx3 |
|
|
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" |
|
|
|
|
|
model_size = "base.en" |
|
model = WhisperModel(model_size, device="cpu", compute_type="int8", num_workers=5) |
|
llm = Ollama(model="tinyllama") |
|
|
|
|
|
engine = pyttsx3.init('sapi5') |
|
voices = engine.getProperty('voices') |
|
engine.setProperty('voice',voices[0].id) |
|
engine.setProperty('rate',180) |
|
|
|
def speak(audio): |
|
engine.say(audio) |
|
engine.runAndWait() |
|
|
|
|
|
audio_data = st.audio("recorded_audio.wav", format="audio/wav", start_time=0) |
|
if st.button("Record"): |
|
with st.spinner("Recording..."): |
|
recorded_audio = sd.rec(int(5 * 44100), samplerate=44100, channels=2, dtype="int16") |
|
sd.wait() |
|
sf.write("recorded_audio.wav", recorded_audio, samplerate=44100) |
|
|
|
st.audio("recorded_audio.wav", format="audio/wav", start_time=0) |
|
|
|
|
|
with open("recorded_audio.wav", "rb") as audio_file: |
|
segments,info= model.transcribe(io.BytesIO(audio_file.read()), beam_size=10) |
|
for segment in segments: |
|
prompt=segment.text |
|
print(prompt) |
|
st.text(prompt) |
|
if prompt: |
|
response = llm.invoke(prompt) |
|
st.success("Response: " + response) |
|
speak(response) |
|
st.stop() |
|
else: |
|
st.error("Failed to transcribe audio.") |
|
|