File size: 2,790 Bytes
dbb5b1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c75cf0
91773b9
5c75cf0
 
91773b9
 
 
 
 
 
 
 
 
5c75cf0
 
 
 
 
 
 
91773b9
5c75cf0
 
91773b9
5c75cf0
 
91773b9
5c75cf0
 
91773b9
 
 
 
 
5c75cf0
 
91773b9
dbb5b1d
 
 
 
 
 
5c75cf0
 
 
 
 
 
 
91773b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool
#from dotenv import load_dotenv
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, ManagedAgent, VisitWebpageTool, tool

model = HfApiModel()

search_tool = DuckDuckGoSearchTool()

visit_webpage_tool = VisitWebpageTool()


agent = CodeAgent(
    tools=[search_tool, visit_webpage_tool],
    model=model,
    additional_authorized_imports=['requests', 'bs4', 'pandas', 'gradio', 'concurrent.futures', 'csv', 'json']
)


"""Deploying AI Voice Chatbot Gradio App."""
import gradio as gr
from typing import Tuple

from utils import (
    TextGenerationPipeline,
    from_en_translation,
    html_audio_autoplay,
    stt,
    to_en_translation,
    tts,
    tts_to_bytesio,
)

max_answer_length = 100
desired_language = "de"
response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)


def main(audio: object) -> Tuple[str, str, str, object]:
    """Calls functions for deploying Gradio app.

    It responds both verbally and in text
    by taking voice input from the user.

    Args:
        audio (object): Recorded speech of the user.

    Returns:
        tuple containing:
        - user_speech_text (str): Recognized speech.
        - bot_response_de (str): Translated answer of the bot.
        - bot_response_en (str): Bot's original answer.
        - html (object): Autoplayer for bot's speech.
    """
    user_speech_text = stt(audio, desired_language)
    translated_text = to_en_translation(user_speech_text, desired_language)
    #TODO call the agent 
    
   # bot_response_en = response_generator_pipe(translated_text)
    bot_response_en = agent.run(translated_text)
    
    ###
    bot_response_de = from_en_translation(bot_response_en, desired_language)
    bot_voice = tts(bot_response_de, desired_language)
    bot_voice_bytes = tts_to_bytesio(bot_voice)
    html = html_audio_autoplay(bot_voice_bytes)
    return user_speech_text, bot_response_de, bot_response_en, html


# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## AI Voice Chatbot")
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Speak or Upload Audio")
        submit_btn = gr.Button("Submit")
    with gr.Row():
        user_speech_text = gr.Textbox(label="You said:", interactive=False)
        bot_response_de = gr.Textbox(label="AI said (in German):", interactive=False)
        bot_response_en = gr.Textbox(label="AI said (in English):", interactive=False)
    html_output = gr.HTML()

    # Connect the function to the components
    submit_btn.click(
        fn=main,
        inputs=[audio_input],
        outputs=[user_speech_text, bot_response_de, bot_response_en, html_output],
    )

# Launch the Gradio app
demo.launch(debug=True)