File size: 5,806 Bytes
d2dc763
9b5b26a
d2dc763
c19d193
d2dc763
 
 
 
 
6aae614
9b5b26a
cfb5578
 
9b5b26a
6b449ac
 
696c962
6dafb46
f08fb67
6dafb46
798b3c8
 
 
 
 
6dafb46
 
35cb1a5
 
 
 
 
 
 
 
6dafb46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696c962
f08fb67
6add177
f08fb67
c5886e3
f08fb67
f7035ff
35f68a0
c5886e3
 
 
 
 
f08fb67
6add177
 
f7035ff
f08fb67
953e326
2369056
f08fb67
8b67e5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13a73d4
8c01ffb
6dafb46
f08fb67
8b67e5c
f08fb67
6aae614
0759335
 
 
 
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
 
9b5b26a
 
8c01ffb
861422e
 
9b5b26a
8c01ffb
8fe992b
8b67e5c
8c01ffb
 
 
 
 
 
861422e
08267c1
8fe992b
8b67e5c
 
 
 
 
13bec1b
 
 
8b67e5c
 
 
13bec1b
8b67e5c
 
 
 
13bec1b
8b67e5c
 
13bec1b
8b67e5c
13bec1b
 
8b67e5c
20d75d2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os
import requests
import random
import yaml
import datetime
import pytz
import gradio as gr
from gtts import gTTS  # Use Google TTS instead of pyttsx3
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
from tools.web_search import DuckDuckGoSearchTool
from tools.visit_webpage import VisitWebpageTool

from Code_Functions import speak_text


from smolagents.agent_types import AgentText
from smolagents.agent_types import AgentAudio

import soundfile
import io
import librosa
import numpy as np

@tool
def lookup_definition(query: str) -> AgentText:
    """Fetches the definition of a word from the Dictionary API and returns it as AgentText.
    
    Args:
        query: The word to look up.
    
    Returns:
        A text response.
    """
    url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{query}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        
        if not data:
            response_text = "No definition found."
        else:
            word = data[0].get("word", "Unknown word")
            origin = data[0].get("origin", "Origin not available")
            definitions = [
                f"({meaning.get('partOfSpeech', 'Unknown')}) {definition['definition']}"
                for meaning in data[0].get("meanings", [])
                for definition in meaning.get("definitions", [])
            ]
            if not definitions:
                response_text = f"Word: {word}\nOrigin: {origin}\nNo definitions found."
            else:
                response_text = f"Word: {word}\nOrigin: {origin}\nDefinitions:\n- " + "\n- ".join(definitions)
    except requests.RequestException as e:
        response_text = f"Error fetching definition: {str(e)}"
        
    return AgentText(response_text)







@tool
def text_to_speech(text: str) -> AgentAudio:
    """
    Converts input text to speech and returns an AgentAudio instance with the audio file path.
    Args:
        text: The text sring with word definition that will be converted into speech.
    Returns:
        AgentAudio: An AgentAudio instance containing the file path to the generated audio.
    """
    from gtts import gTTS
    AUDIO_OUTPUT_PATH = "/tmp/response.mp3"
    tts = gTTS(text=text, lang='en')
    tts.save(AUDIO_OUTPUT_PATH)
    return AgentAudio(AUDIO_OUTPUT_PATH)


# Define the audio output path
AUDIO_OUTPUT_PATH = "/tmp/response.mp3"

def speak_text(text):
    """Convert text to speech using gTTS and save as an MP3 file."""
    tts = gTTS(text=text, lang='en')
    tts.save(AUDIO_OUTPUT_PATH)
    return AUDIO_OUTPUT_PATH  # Return the file path for Gradio Audio component



@tool
def search_dad_jokes(term: str) -> str:
    """A tool that searches for dad jokes containing a specific term.
    Args:
        term: The keyword to search for in dad jokes.
    """
    try:
        headers = {
            "Accept": "application/json",
            "User-Agent": "YourAppName (https://yourappurl.com)"
        }
        response = requests.get(f"https://icanhazdadjoke.com/search?term={term}", headers=headers)
        data = response.json()
        if data['results']:
            jokes = [joke['joke'] for joke in data['results']]
            response_text = f"Found {len(jokes)} jokes:\n" + "\n\n".join(jokes)
        else:
            response_text = f"No jokes found for the term '{term}'."
    except Exception as e:
        response_text = f"Error searching for jokes: {str(e)}"

    # Generate audio using gTTS
    audio_file = speak_text(response_text)

    return response_text, audio_file  # Return text and audio file path


lookup_definition_tool = lookup_definition
text_to_speech_tool = text_to_speech
dad_jokes_tool = search_dad_jokes

final_answer = FinalAnswerTool()
web_search_tool = DuckDuckGoSearchTool()
visit_webpage_tool = VisitWebpageTool()



# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)


# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[visit_webpage_tool, web_search_tool, final_answer, image_generation_tool, lookup_definition_tool, text_to_speech_tool, dad_jokes_tool], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
    )

#Gradio interface with text and audio output
def gradio_search_jokes(word):
    """Wrapper function for Gradio to call search_dad_jokes and generate audio."""
    response_text, audio_file = search_dad_jokes(word)  # Ensure search_dad_jokes returns (text, file path)
    return response_text, audio_file



#Define the Gradio UI
with gr.Blocks() as demo:
   gr.Markdown("### Dad Jokes Finder with AI & Text-to-Speech 🎙️")
   
   with gr.Row():
       input_box = gr.Textbox(label="Enter a word")
       output_text = gr.Textbox(label="Jokes Found")
       output_audio = gr.Audio(label="Audio Pronunciation", type="filepath")
   
   btn = gr.Button("Get Jokes")
   btn.click(gradio_search_jokes, inputs=input_box, outputs=[output_text, output_audio])

demo.launch()


# GradioUI(agent).launch()