File size: 3,240 Bytes
ed9b1d3
 
 
9bde428
ed9b1d3
 
 
 
 
 
7cbd8b4
 
 
 
 
 
ed9b1d3
 
 
 
7cbd8b4
ed9b1d3
 
7cbd8b4
 
 
 
 
ed9b1d3
 
7cbd8b4
 
 
 
 
 
ed9b1d3
 
7cbd8b4
ed9b1d3
 
 
 
7cbd8b4
ed9b1d3
7cbd8b4
 
ed9b1d3
 
 
 
 
 
9bde428
ed9b1d3
 
9bde428
ed9b1d3
7cbd8b4
 
 
 
 
 
9bde428
7cbd8b4
 
 
 
9bde428
7cbd8b4
9bde428
 
7cbd8b4
 
9bde428
7cbd8b4
9bde428
 
 
7cbd8b4
 
 
 
9bde428
 
ed9b1d3
 
7cbd8b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import speech_recognition as sr  # type: ignore
import os
import pygame  # type: ignore
import gradio as gr
from gtts import gTTS  # type: ignore
from pydub import AudioSegment
from pydub.playback import play

# Dog sound files (Ensure these files exist)
dog_sounds = {
    "sit": "dog_sit.mp3",
    "come": "dog_come.mp3",
    "fetch": "dog_fetch.mp3",
    "treat": "dog_treat.mp3",
    "play": "dog_play.mp3",
    "bark": "dog_bark.mp3"
}

# Initialize speech recognizer
recognizer = sr.Recognizer()
os.environ["SDL_AUDIODRIVER"] = "dummy"  # Prevents pygame audio errors in headless mode
pygame.mixer.init()

def recognize_speech(audio_file):
    """Recognizes speech from an uploaded audio file."""
    try:
        with sr.AudioFile(audio_file) as source:
            audio = recognizer.record(source)
            command = recognizer.recognize_google(audio)
            return command.lower()
    except sr.UnknownValueError:
        return "Sorry, I could not understand your speech."
    except sr.RequestError:
        return "Sorry, the speech service is unavailable."
    except Exception as e:
        return f"Error: {str(e)}"

def dog_response(command):
    """Plays the corresponding dog sound and generates a speech response."""
    if command:
        for key in dog_sounds:
            if key in command:
                play_dog_sound(dog_sounds[key])
                return key, f"Playing sound for {key}", generate_speech(f"Woof! I heard you say {key}")
        play_dog_sound(dog_sounds["bark"])
        return "bark", "No specific dog command recognized. Playing default bark sound.", generate_speech("Woof! I didn't recognize that, so I'll just bark!")
    return "unknown", "No command to process.", None

def play_dog_sound(sound_file):
    """Plays an audio file using Pygame."""
    if os.path.exists(sound_file):
        pygame.mixer.music.load(sound_file)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            continue
    else:
        return f"Error: Sound file '{sound_file}' not found."

def generate_speech(text):
    """Generates a TTS response and returns the file path."""
    speech_file = "dog_response.mp3"
    tts = gTTS(text=text, lang="en")
    tts.save(speech_file)
    return speech_file  # Return audio file for Gradio

def process_command(audio_file):
    command = recognize_speech(audio_file)
    keyword, response_text, speech_file = dog_response(command)
    return command, response_text, speech_file

# Gradio UI
iface = gr.Interface(
    fn=process_command,
    inputs=gr.Audio(sources=["microphone"], type="filepath"),
    outputs=["text", "text", "audio"],  # Added audio output
    title="๐Ÿถ Dog Command Recognition ๐Ÿถ",
    description="๐ŸŽค Speak a command and let the dog respond! ๐Ÿ•\n\nTry commands like 'sit', 'come', 'fetch', 'treat', 'play'",
    theme="default",
    live=True,
    css="""
    body { background-color: #f8f9fa; text-align: center; }
    .output-text { color: #ff4500; font-size: 20px; font-weight: bold; }
    .interface-title { color: #008080; font-size: 26px; font-weight: bold; }
    .interface-description { color: #2f4f4f; font-size: 18px; }
    """
)

if __name__ == "__main__":
    iface.launch(share=True)