Spaces:
Running
Running
File size: 3,219 Bytes
ed9b1d3 9bde428 ed9b1d3 7e536ad ed9b1d3 7cbd8b4 ed9b1d3 7cbd8b4 ed9b1d3 7cbd8b4 ed9b1d3 7cbd8b4 ed9b1d3 7cbd8b4 ed9b1d3 7e536ad ed9b1d3 9bde428 ed9b1d3 9bde428 ed9b1d3 7cbd8b4 9bde428 7cbd8b4 7e536ad 7cbd8b4 7e536ad 9bde428 7cbd8b4 9bde428 7cbd8b4 7e536ad 9bde428 7cbd8b4 9bde428 ed9b1d3 7cbd8b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import speech_recognition as sr # type: ignore
import os
import pygame # type: ignore
import gradio as gr
from gtts import gTTS # type: ignore
from pydub import AudioSegment
from pydub.playback import play
# Dog sound files (Ensure these files exist in the same directory)
dog_sounds = {
"sit": "dog_sit.mp3",
"come": "dog_come.mp3",
"fetch": "dog_fetch.mp3",
"treat": "dog_treat.mp3",
"play": "dog_play.mp3",
"bark": "dog_bark.mp3"
}
# Initialize speech recognizer
recognizer = sr.Recognizer()
os.environ["SDL_AUDIODRIVER"] = "dummy" # Prevents pygame audio errors in headless mode
pygame.mixer.init()
def recognize_speech(audio_file):
"""Recognizes speech from an uploaded audio file."""
try:
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
command = recognizer.recognize_google(audio)
return command.lower()
except sr.UnknownValueError:
return "Sorry, I could not understand your speech."
except sr.RequestError:
return "Sorry, the speech service is unavailable."
except Exception as e:
return f"Error: {str(e)}"
def dog_response(command):
"""Plays the corresponding dog sound and generates a speech response."""
if command:
for key in dog_sounds:
if key in command:
sound_file = dog_sounds[key]
play_dog_sound(sound_file)
return sound_file, f"Playing sound for {key}", generate_speech(f"Woof! I heard you say {key}")
# If no specific command is recognized, play bark
sound_file = dog_sounds["bark"]
play_dog_sound(sound_file)
return sound_file, "No specific dog command recognized. Playing default bark sound.", generate_speech("Woof! I didn't recognize that, so I'll just bark!")
return None, "No command to process.", None
def play_dog_sound(sound_file):
"""Plays an audio file using Pygame."""
if os.path.exists(sound_file):
pygame.mixer.music.load(sound_file)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
continue
else:
return f"Error: Sound file '{sound_file}' not found."
def generate_speech(text):
"""Generates a TTS response and returns the file path."""
speech_file = "dog_response.mp3"
tts = gTTS(text=text, lang="en")
tts.save(speech_file)
return speech_file # Return audio file for Gradio
def process_command(audio_file):
"""Processes the command from the user."""
command = recognize_speech(audio_file)
sound_file, response_text, speech_file = dog_response(command)
# Return the dog sound file in the first output
return sound_file, response_text, speech_file
# Gradio UI
iface = gr.Interface(
fn=process_command,
inputs=gr.Audio(sources=["microphone"], type="filepath"),
outputs=["audio", "text", "audio"], # First box now plays dog sound
title="๐ถ Dog Command Recognition ๐ถ",
description="๐ค Speak a command and let the dog respond! ๐\n\nTry commands like 'sit', 'come', 'fetch', 'treat', 'play'",
live=True,
)
if __name__ == "__main__":
iface.launch(share=True)
|