Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,6 @@ from fastrtc import (
|
|
17 |
wait_for_item,
|
18 |
)
|
19 |
from google import genai
|
20 |
-
from google.genai import types
|
21 |
from google.genai.types import (
|
22 |
LiveConnectConfig,
|
23 |
PrebuiltVoiceConfig,
|
@@ -30,9 +29,12 @@ from google.genai.types import (
|
|
30 |
)
|
31 |
from gradio.utils import get_space
|
32 |
from pydantic import BaseModel
|
|
|
33 |
|
34 |
-
|
|
|
35 |
|
|
|
36 |
load_dotenv()
|
37 |
|
38 |
|
@@ -41,6 +43,20 @@ def encode_audio(data: np.ndarray) -> str:
|
|
41 |
return base64.b64encode(data.tobytes()).decode("UTF-8")
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
class GeminiHandler(AsyncStreamHandler):
|
45 |
"""Handler for the Gemini API"""
|
46 |
|
@@ -69,26 +85,26 @@ class GeminiHandler(AsyncStreamHandler):
|
|
69 |
await self.wait_for_args()
|
70 |
api_key, voice_name, system_message = self.latest_args[1:]
|
71 |
else:
|
72 |
-
api_key, voice_name, system_message = None, "Kore", "
|
|
|
|
|
|
|
73 |
|
74 |
client = genai.Client(
|
75 |
api_key=api_key or os.getenv("GEMINI_API_KEY"),
|
76 |
http_options={"api_version": "v1alpha"},
|
77 |
)
|
78 |
|
79 |
-
|
80 |
-
tools = [
|
81 |
-
Tool(google_search=GoogleSearch()),
|
82 |
-
]
|
83 |
system_instruction = Content(
|
84 |
parts=[Part.from_text(text=f"{system_message}")],
|
85 |
role="user"
|
86 |
)
|
87 |
|
88 |
config = LiveConnectConfig(
|
89 |
-
response_modalities=["AUDIO"],
|
90 |
speech_config=SpeechConfig(
|
91 |
-
language_code=
|
92 |
voice_config=VoiceConfig(
|
93 |
prebuilt_voice_config=PrebuiltVoiceConfig(
|
94 |
voice_name=voice_name,
|
@@ -98,7 +114,7 @@ class GeminiHandler(AsyncStreamHandler):
|
|
98 |
tools=tools,
|
99 |
system_instruction=system_instruction,
|
100 |
)
|
101 |
-
|
102 |
async with client.aio.live.connect(
|
103 |
model="gemini-2.0-flash-exp", config=config
|
104 |
) as session:
|
@@ -152,12 +168,12 @@ stream = Stream(
|
|
152 |
"Fenrir",
|
153 |
"Aoede",
|
154 |
],
|
155 |
-
value="Kore",
|
156 |
),
|
157 |
gr.Textbox(
|
158 |
label="System Message",
|
159 |
placeholder="Enter system instructions for the AI...",
|
160 |
-
value="
|
161 |
lines=3,
|
162 |
),
|
163 |
],
|
@@ -172,13 +188,17 @@ class InputData(BaseModel):
|
|
172 |
|
173 |
|
174 |
app = FastAPI()
|
175 |
-
|
176 |
stream.mount(app)
|
177 |
|
178 |
|
179 |
@app.post("/input_hook")
|
180 |
async def _(body: InputData):
|
181 |
-
stream.set_input(
|
|
|
|
|
|
|
|
|
|
|
182 |
return {"status": "ok"}
|
183 |
|
184 |
|
@@ -199,5 +219,4 @@ if __name__ == "__main__":
|
|
199 |
stream.fastphone(host="0.0.0.0", port=7860)
|
200 |
else:
|
201 |
import uvicorn
|
202 |
-
|
203 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
17 |
wait_for_item,
|
18 |
)
|
19 |
from google import genai
|
|
|
20 |
from google.genai.types import (
|
21 |
LiveConnectConfig,
|
22 |
PrebuiltVoiceConfig,
|
|
|
29 |
)
|
30 |
from gradio.utils import get_space
|
31 |
from pydantic import BaseModel
|
32 |
+
from langdetect import detect, DetectorFactory
|
33 |
|
34 |
+
# Make langdetect results consistent
|
35 |
+
DetectorFactory.seed = 0
|
36 |
|
37 |
+
current_dir = pathlib.Path(__file__).parent
|
38 |
load_dotenv()
|
39 |
|
40 |
|
|
|
43 |
return base64.b64encode(data.tobytes()).decode("UTF-8")
|
44 |
|
45 |
|
46 |
+
def detect_language_code(text: str) -> str:
|
47 |
+
"""Detect if the text is in English or Spanish and return Google TTS code"""
|
48 |
+
try:
|
49 |
+
lang = detect(text)
|
50 |
+
if lang.startswith("es"):
|
51 |
+
return "es-ES" # Spanish
|
52 |
+
elif lang.startswith("en"):
|
53 |
+
return "en-US" # English
|
54 |
+
else:
|
55 |
+
return "en-US" # default
|
56 |
+
except Exception:
|
57 |
+
return "en-US"
|
58 |
+
|
59 |
+
|
60 |
class GeminiHandler(AsyncStreamHandler):
|
61 |
"""Handler for the Gemini API"""
|
62 |
|
|
|
85 |
await self.wait_for_args()
|
86 |
api_key, voice_name, system_message = self.latest_args[1:]
|
87 |
else:
|
88 |
+
api_key, voice_name, system_message = None, "Kore", "You are a helpful assistant."
|
89 |
+
|
90 |
+
# Auto-detect language from system_message
|
91 |
+
lang_code = detect_language_code(system_message)
|
92 |
|
93 |
client = genai.Client(
|
94 |
api_key=api_key or os.getenv("GEMINI_API_KEY"),
|
95 |
http_options={"api_version": "v1alpha"},
|
96 |
)
|
97 |
|
98 |
+
tools = [Tool(google_search=GoogleSearch())]
|
|
|
|
|
|
|
99 |
system_instruction = Content(
|
100 |
parts=[Part.from_text(text=f"{system_message}")],
|
101 |
role="user"
|
102 |
)
|
103 |
|
104 |
config = LiveConnectConfig(
|
105 |
+
response_modalities=["AUDIO"],
|
106 |
speech_config=SpeechConfig(
|
107 |
+
language_code=lang_code,
|
108 |
voice_config=VoiceConfig(
|
109 |
prebuilt_voice_config=PrebuiltVoiceConfig(
|
110 |
voice_name=voice_name,
|
|
|
114 |
tools=tools,
|
115 |
system_instruction=system_instruction,
|
116 |
)
|
117 |
+
|
118 |
async with client.aio.live.connect(
|
119 |
model="gemini-2.0-flash-exp", config=config
|
120 |
) as session:
|
|
|
168 |
"Fenrir",
|
169 |
"Aoede",
|
170 |
],
|
171 |
+
value="Kore",
|
172 |
),
|
173 |
gr.Textbox(
|
174 |
label="System Message",
|
175 |
placeholder="Enter system instructions for the AI...",
|
176 |
+
value="You are a helpful assistant who answers questions and helps with tasks.",
|
177 |
lines=3,
|
178 |
),
|
179 |
],
|
|
|
188 |
|
189 |
|
190 |
app = FastAPI()
|
|
|
191 |
stream.mount(app)
|
192 |
|
193 |
|
194 |
@app.post("/input_hook")
|
195 |
async def _(body: InputData):
|
196 |
+
stream.set_input(
|
197 |
+
body.webrtc_id,
|
198 |
+
body.api_key,
|
199 |
+
body.voice_name,
|
200 |
+
body.system_message,
|
201 |
+
)
|
202 |
return {"status": "ok"}
|
203 |
|
204 |
|
|
|
219 |
stream.fastphone(host="0.0.0.0", port=7860)
|
220 |
else:
|
221 |
import uvicorn
|
222 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|