File size: 3,671 Bytes
ed9b1d3
 
 
9bde428
ed9b1d3
 
7e536ad
ed9b1d3
7cbd8b4
 
 
 
 
8d5e69c
 
 
ed9b1d3
 
 
 
7cbd8b4
ed9b1d3
 
7cbd8b4
 
 
 
 
ed9b1d3
 
7cbd8b4
 
 
 
 
 
ed9b1d3
 
7cbd8b4
ed9b1d3
 
 
7e536ad
 
 
 
 
 
 
 
 
 
ed9b1d3
 
 
 
 
 
9bde428
ed9b1d3
 
9bde428
ed9b1d3
7cbd8b4
 
 
 
 
 
9bde428
7cbd8b4
7e536ad
7cbd8b4
7e536ad
 
 
 
9bde428
44fe398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bde428
 
7cbd8b4
44fe398
9bde428
44fe398
9bde428
44fe398
 
9bde428
ed9b1d3
 
7cbd8b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import speech_recognition as sr  # type: ignore
import os
import pygame  # type: ignore
import gradio as gr
from gtts import gTTS  # type: ignore

# Dog sound files (Ensure these files exist in the same directory)
dog_sounds = {
    "sit": "dog_sit.mp3",
    "come": "dog_come.mp3",
    "fetch": "dog_fetch.mp3",
    "treat": "dog_treat.mp3",
    "play": "dog_play.mp3",
    "bark": "dog_bark.mp3",
    "angry": "mixkit-angry-and-agitated-dog-growling-53.wav",
    "happy": "mixkit-happy-puppy-barks-741.wav",
}

# Initialize speech recognizer
recognizer = sr.Recognizer()
os.environ["SDL_AUDIODRIVER"] = "dummy"  # Prevents pygame audio errors in headless mode
pygame.mixer.init()

def recognize_speech(audio_file):
    """Recognizes speech from an uploaded audio file."""
    try:
        with sr.AudioFile(audio_file) as source:
            audio = recognizer.record(source)
            command = recognizer.recognize_google(audio)
            return command.lower()
    except sr.UnknownValueError:
        return "Sorry, I could not understand your speech."
    except sr.RequestError:
        return "Sorry, the speech service is unavailable."
    except Exception as e:
        return f"Error: {str(e)}"

def dog_response(command):
    """Plays the corresponding dog sound and generates a speech response."""
    if command:
        for key in dog_sounds:
            if key in command:
                sound_file = dog_sounds[key]
                play_dog_sound(sound_file)
                return sound_file, f"Playing sound for {key}", generate_speech(f"Woof! I heard you say {key}")
        
        # If no specific command is recognized, play bark
        sound_file = dog_sounds["bark"]
        play_dog_sound(sound_file)
        return sound_file, "No specific dog command recognized. Playing default bark sound.", generate_speech("Woof! I didn't recognize that, so I'll just bark!")
    
    return None, "No command to process.", None

def play_dog_sound(sound_file):
    """Plays an audio file using Pygame."""
    if os.path.exists(sound_file):
        pygame.mixer.music.load(sound_file)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            continue
    else:
        return f"Error: Sound file '{sound_file}' not found."

def generate_speech(text):
    """Generates a TTS response and returns the file path."""
    speech_file = "dog_response.mp3"
    tts = gTTS(text=text, lang="en")
    tts.save(speech_file)
    return speech_file  # Return audio file for Gradio

def process_command(audio_file):
    """Processes the command from the user."""
    command = recognize_speech(audio_file)
    sound_file, response_text, speech_file = dog_response(command)
    
    # Return the dog sound file in the first output
    return sound_file, response_text, speech_file

# Gradio UI with a purple theme
custom_css = """
body {
    background-color: #6a0dad;
    color: white;
    text-align: center;
}
.gradio-container {
    font-family: 'Arial', sans-serif;
    background-color: #800080;
    padding: 20px;
    border-radius: 15px;
}
button {
    background-color: #ffccff !important;
    color: #6a0dad !important;
    font-weight: bold !important;
}
"""

iface = gr.Interface(
    fn=process_command,
    inputs=gr.Audio(sources=["microphone"], type="filepath"),
    outputs=["audio", "text", "audio"],
    title="๐Ÿถ Dog Command Recognition ๐Ÿถ",
    description="๐ŸŽค Speak a command and let the dog respond! ๐Ÿ•\n\nTry commands like 'happy', 'angry', 'sit', 'come', 'fetch', 'treat', 'play'",
    live=True,
    theme="compact",
    css=custom_css,
)

if __name__ == "__main__":
    iface.launch(share=True)