File size: 3,219 Bytes
ed9b1d3
 
 
9bde428
ed9b1d3
 
 
 
7e536ad
ed9b1d3
7cbd8b4
 
 
 
 
 
ed9b1d3
 
 
 
7cbd8b4
ed9b1d3
 
7cbd8b4
 
 
 
 
ed9b1d3
 
7cbd8b4
 
 
 
 
 
ed9b1d3
 
7cbd8b4
ed9b1d3
 
 
7e536ad
 
 
 
 
 
 
 
 
 
ed9b1d3
 
 
 
 
 
9bde428
ed9b1d3
 
9bde428
ed9b1d3
7cbd8b4
 
 
 
 
 
9bde428
7cbd8b4
7e536ad
7cbd8b4
7e536ad
 
 
 
9bde428
7cbd8b4
9bde428
 
7cbd8b4
7e536ad
9bde428
7cbd8b4
9bde428
 
ed9b1d3
 
7cbd8b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import speech_recognition as sr  # type: ignore
import os
import pygame  # type: ignore
import gradio as gr
from gtts import gTTS  # type: ignore
from pydub import AudioSegment
from pydub.playback import play

# Dog sound files (Ensure these files exist in the same directory)
dog_sounds = {
    "sit": "dog_sit.mp3",
    "come": "dog_come.mp3",
    "fetch": "dog_fetch.mp3",
    "treat": "dog_treat.mp3",
    "play": "dog_play.mp3",
    "bark": "dog_bark.mp3"
}

# Initialize speech recognizer
recognizer = sr.Recognizer()
os.environ["SDL_AUDIODRIVER"] = "dummy"  # Prevents pygame audio errors in headless mode
pygame.mixer.init()

def recognize_speech(audio_file):
    """Recognizes speech from an uploaded audio file."""
    try:
        with sr.AudioFile(audio_file) as source:
            audio = recognizer.record(source)
            command = recognizer.recognize_google(audio)
            return command.lower()
    except sr.UnknownValueError:
        return "Sorry, I could not understand your speech."
    except sr.RequestError:
        return "Sorry, the speech service is unavailable."
    except Exception as e:
        return f"Error: {str(e)}"

def dog_response(command):
    """Plays the corresponding dog sound and generates a speech response."""
    if command:
        for key in dog_sounds:
            if key in command:
                sound_file = dog_sounds[key]
                play_dog_sound(sound_file)
                return sound_file, f"Playing sound for {key}", generate_speech(f"Woof! I heard you say {key}")
        
        # If no specific command is recognized, play bark
        sound_file = dog_sounds["bark"]
        play_dog_sound(sound_file)
        return sound_file, "No specific dog command recognized. Playing default bark sound.", generate_speech("Woof! I didn't recognize that, so I'll just bark!")
    
    return None, "No command to process.", None

def play_dog_sound(sound_file):
    """Plays an audio file using Pygame."""
    if os.path.exists(sound_file):
        pygame.mixer.music.load(sound_file)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            continue
    else:
        return f"Error: Sound file '{sound_file}' not found."

def generate_speech(text):
    """Generates a TTS response and returns the file path."""
    speech_file = "dog_response.mp3"
    tts = gTTS(text=text, lang="en")
    tts.save(speech_file)
    return speech_file  # Return audio file for Gradio

def process_command(audio_file):
    """Processes the command from the user."""
    command = recognize_speech(audio_file)
    sound_file, response_text, speech_file = dog_response(command)
    
    # Return the dog sound file in the first output
    return sound_file, response_text, speech_file

# Gradio UI
iface = gr.Interface(
    fn=process_command,
    inputs=gr.Audio(sources=["microphone"], type="filepath"),
    outputs=["audio", "text", "audio"],  # First box now plays dog sound
    title="๐Ÿถ Dog Command Recognition ๐Ÿถ",
    description="๐ŸŽค Speak a command and let the dog respond! ๐Ÿ•\n\nTry commands like 'sit', 'come', 'fetch', 'treat', 'play'",
    live=True,
)

if __name__ == "__main__":
    iface.launch(share=True)