TakakuwaLui commited on
Commit
63115d0
·
verified ·
1 Parent(s): 7f008b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -16
app.py CHANGED
@@ -17,7 +17,6 @@ from fastrtc import (
17
  wait_for_item,
18
  )
19
  from google import genai
20
- from google.genai import types
21
  from google.genai.types import (
22
  LiveConnectConfig,
23
  PrebuiltVoiceConfig,
@@ -30,9 +29,12 @@ from google.genai.types import (
30
  )
31
  from gradio.utils import get_space
32
  from pydantic import BaseModel
 
33
 
34
- current_dir = pathlib.Path(__file__).parent
 
35
 
 
36
  load_dotenv()
37
 
38
 
@@ -41,6 +43,20 @@ def encode_audio(data: np.ndarray) -> str:
41
  return base64.b64encode(data.tobytes()).decode("UTF-8")
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  class GeminiHandler(AsyncStreamHandler):
45
  """Handler for the Gemini API"""
46
 
@@ -69,26 +85,26 @@ class GeminiHandler(AsyncStreamHandler):
69
  await self.wait_for_args()
70
  api_key, voice_name, system_message = self.latest_args[1:]
71
  else:
72
- api_key, voice_name, system_message = None, "Kore", "Du bist ein hilfsamer Assistent."
 
 
 
73
 
74
  client = genai.Client(
75
  api_key=api_key or os.getenv("GEMINI_API_KEY"),
76
  http_options={"api_version": "v1alpha"},
77
  )
78
 
79
- # Define the tools and system instruction
80
- tools = [
81
- Tool(google_search=GoogleSearch()),
82
- ]
83
  system_instruction = Content(
84
  parts=[Part.from_text(text=f"{system_message}")],
85
  role="user"
86
  )
87
 
88
  config = LiveConnectConfig(
89
- response_modalities=["AUDIO"], # type: ignore
90
  speech_config=SpeechConfig(
91
- language_code="de-DE",
92
  voice_config=VoiceConfig(
93
  prebuilt_voice_config=PrebuiltVoiceConfig(
94
  voice_name=voice_name,
@@ -98,7 +114,7 @@ class GeminiHandler(AsyncStreamHandler):
98
  tools=tools,
99
  system_instruction=system_instruction,
100
  )
101
-
102
  async with client.aio.live.connect(
103
  model="gemini-2.0-flash-exp", config=config
104
  ) as session:
@@ -152,12 +168,12 @@ stream = Stream(
152
  "Fenrir",
153
  "Aoede",
154
  ],
155
- value="Kore", # Changed default to Kore
156
  ),
157
  gr.Textbox(
158
  label="System Message",
159
  placeholder="Enter system instructions for the AI...",
160
- value="Du bist ein hilfsamer Assistent, der Fragen beantwortet und bei verschiedenen Aufgaben hilft. Du kannst bei Bedarf auch im Internet suchen, um aktuelle Informationen zu finden.",
161
  lines=3,
162
  ),
163
  ],
@@ -172,13 +188,17 @@ class InputData(BaseModel):
172
 
173
 
174
  app = FastAPI()
175
-
176
  stream.mount(app)
177
 
178
 
179
  @app.post("/input_hook")
180
  async def _(body: InputData):
181
- stream.set_input(body.webrtc_id, body.api_key, body.voice_name, body.system_message)
 
 
 
 
 
182
  return {"status": "ok"}
183
 
184
 
@@ -199,5 +219,4 @@ if __name__ == "__main__":
199
  stream.fastphone(host="0.0.0.0", port=7860)
200
  else:
201
  import uvicorn
202
-
203
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
17
  wait_for_item,
18
  )
19
  from google import genai
 
20
  from google.genai.types import (
21
  LiveConnectConfig,
22
  PrebuiltVoiceConfig,
 
29
  )
30
  from gradio.utils import get_space
31
  from pydantic import BaseModel
32
+ from langdetect import detect, DetectorFactory
33
 
34
+ # Make langdetect results consistent
35
+ DetectorFactory.seed = 0
36
 
37
+ current_dir = pathlib.Path(__file__).parent
38
  load_dotenv()
39
 
40
 
 
43
  return base64.b64encode(data.tobytes()).decode("UTF-8")
44
 
45
 
46
+ def detect_language_code(text: str) -> str:
47
+ """Detect if the text is in English or Spanish and return Google TTS code"""
48
+ try:
49
+ lang = detect(text)
50
+ if lang.startswith("es"):
51
+ return "es-ES" # Spanish
52
+ elif lang.startswith("en"):
53
+ return "en-US" # English
54
+ else:
55
+ return "en-US" # default
56
+ except Exception:
57
+ return "en-US"
58
+
59
+
60
  class GeminiHandler(AsyncStreamHandler):
61
  """Handler for the Gemini API"""
62
 
 
85
  await self.wait_for_args()
86
  api_key, voice_name, system_message = self.latest_args[1:]
87
  else:
88
+ api_key, voice_name, system_message = None, "Kore", "You are a helpful assistant."
89
+
90
+ # Auto-detect language from system_message
91
+ lang_code = detect_language_code(system_message)
92
 
93
  client = genai.Client(
94
  api_key=api_key or os.getenv("GEMINI_API_KEY"),
95
  http_options={"api_version": "v1alpha"},
96
  )
97
 
98
+ tools = [Tool(google_search=GoogleSearch())]
 
 
 
99
  system_instruction = Content(
100
  parts=[Part.from_text(text=f"{system_message}")],
101
  role="user"
102
  )
103
 
104
  config = LiveConnectConfig(
105
+ response_modalities=["AUDIO"],
106
  speech_config=SpeechConfig(
107
+ language_code=lang_code,
108
  voice_config=VoiceConfig(
109
  prebuilt_voice_config=PrebuiltVoiceConfig(
110
  voice_name=voice_name,
 
114
  tools=tools,
115
  system_instruction=system_instruction,
116
  )
117
+
118
  async with client.aio.live.connect(
119
  model="gemini-2.0-flash-exp", config=config
120
  ) as session:
 
168
  "Fenrir",
169
  "Aoede",
170
  ],
171
+ value="Kore",
172
  ),
173
  gr.Textbox(
174
  label="System Message",
175
  placeholder="Enter system instructions for the AI...",
176
+ value="You are a helpful assistant who answers questions and helps with tasks.",
177
  lines=3,
178
  ),
179
  ],
 
188
 
189
 
190
  app = FastAPI()
 
191
  stream.mount(app)
192
 
193
 
194
  @app.post("/input_hook")
195
  async def _(body: InputData):
196
+ stream.set_input(
197
+ body.webrtc_id,
198
+ body.api_key,
199
+ body.voice_name,
200
+ body.system_message,
201
+ )
202
  return {"status": "ok"}
203
 
204
 
 
219
  stream.fastphone(host="0.0.0.0", port=7860)
220
  else:
221
  import uvicorn
222
+ uvicorn.run(app, host="0.0.0.0", port=7860)