xidu commited on
Commit
9a13ef7
·
1 Parent(s): 708fb30

deploy: Launch ap2 service on port 7861

Browse files
Files changed (4) hide show
  1. Dockerfile +15 -0
  2. README.md +9 -5
  3. app.py +387 -0
  4. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY ./requirements.txt /app/requirements.txt
6
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
7
+
8
+ COPY ./app.py /app/
9
+
10
+ # Hugging Face 要求对外暴露 7860 端口,但内部我们可以使用任何端口
11
+ # 最终我们会通过 Space 的设置来映射端口
12
+ EXPOSE 7861
13
+
14
+ # 直接运行 uvicorn,监听 7861 端口
15
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7861"]
README.md CHANGED
@@ -1,10 +1,14 @@
1
  ---
2
- title: Ap2
3
- emoji: 😻
4
  colorFrom: purple
5
- colorTo: purple
6
  sdk: docker
7
- pinned: false
8
  ---
 
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
+ title: GenAI API Service (ap2)
3
+ emoji: 🚀
4
  colorFrom: purple
5
+ colorTo: pink
6
  sdk: docker
7
+ app_port: 7861
8
  ---
9
+ This is a professional-grade FastAPI application that proxies requests to the Google GenAI API on port 7861.
10
 
11
+ - **`POST /v1/chat/completions`**: Main endpoint for chat, supports streaming.
12
+ - **`GET /v1/models`**: Lists available models.
13
+ - **`GET /health`**: Health check.
14
+ - **`GET /`**: API Info.
app.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import json
3
+ import time
4
+ import asyncio
5
+ import os
6
+ import traceback
7
+ import sys
8
+ from contextlib import asynccontextmanager
9
+ import random
10
+
11
+ import uvicorn
12
+ from fastapi import FastAPI, Request, HTTPException
13
+ from fastapi.responses import StreamingResponse
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from fastapi.responses import JSONResponse
16
+ from google import genai
17
+ from google.genai import types
18
+ from typing import Optional, List, Dict, Any
19
+
20
+ # 简化日志配置
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format='%(asctime)s [%(levelname)s]: %(message)s',
24
+ datefmt='%Y-%m-%d %H:%M:%S'
25
+ )
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # 模型配置
29
+ GEMINI_MODELS = {
30
+ "gemini-2.0-flash-exp": "gemini-2.0-flash-exp",
31
+ "gemini-2.5-flash-preview-05-20": "gemini-2.5-flash-preview-05-20",
32
+ "gemini-2.5-flash": "gemini-2.5-flash",
33
+ "gemini-2.5-flash-preview-04-17": "gemini-2.5-flash-preview-04-17"
34
+ }
35
+
36
+ # 支持的模型列表
37
+ SUPPORTED_MODELS = [
38
+ {
39
+ "id": "gemini-2.5-flash-preview-05-20",
40
+ "object": "model",
41
+ "created": int(time.time()),
42
+ "owned_by": "google",
43
+ "permission": [],
44
+ "root": "gemini-2.5-flash-preview-05-20",
45
+ "parent": None,
46
+ "description": "Gemini 2.5 Flash Preview - 最新实验性模型"
47
+ },
48
+ {
49
+ "id": "gemini-2.5-flash-preview-04-17",
50
+ "object": "model",
51
+ "created": int(time.time()),
52
+ "owned_by": "google",
53
+ "permission": [],
54
+ "root": "gemini-2.5-flash-preview-04-17",
55
+ "parent": None,
56
+ "description": "gemini-2.5-flash-preview-04-17- 经典专业模型"
57
+ },
58
+ {
59
+ "id": "gemini-2.5-flash",
60
+ "object": "model",
61
+ "created": int(time.time()),
62
+ "owned_by": "google",
63
+ "permission": [],
64
+ "root": "gemini-2.5-flash",
65
+ "parent": None,
66
+ "description": "gemini-2.5-flash稳定经典专业模型"
67
+ }
68
+ ]
69
+
70
+
71
+ def get_model_name(requested_model: str) -> str:
72
+ """获取实际的Gemini模型名称"""
73
+ return GEMINI_MODELS.get(requested_model, "gemini-2.5-flash")
74
+
75
+
76
+ def convert_messages(messages):
77
+ content_parts = []
78
+ system_instruction = None
79
+
80
+ for message in messages:
81
+ role = message.get("role", "user")
82
+ content = message.get("content", "")
83
+
84
+ if role == "system":
85
+ system_instruction = content
86
+ elif role == "assistant":
87
+ content_parts.append({
88
+ "role": "model",
89
+ "parts": [{"text": content}]
90
+ })
91
+ elif role == "user":
92
+ content_parts.append({
93
+ "role": "user",
94
+ "parts": [{"text": content}]
95
+ })
96
+
97
+ return content_parts, system_instruction
98
+
99
+
100
+ def handle_error(error):
101
+ """简化的错误处理"""
102
+ error_str = str(error).lower()
103
+
104
+ if "prompt_feedback" in error_str:
105
+ if "other" in error_str:
106
+ return "您的输入内容可能过长或触发了安全策略。请尝试缩短您的问题。", "length"
107
+ elif "safety" in error_str:
108
+ return "您的请求被安全策略阻止。请尝试修改您的问题。", "content_filter"
109
+ elif "safety" in error_str:
110
+ return "您的请求被安全策略过滤。请尝试修改您的问题。", "content_filter"
111
+
112
+ return "生成内容时遇到错误。请稍后重试。", "stop"
113
+
114
+
115
+ @asynccontextmanager
116
+ async def lifespan(app: FastAPI):
117
+ try:
118
+ setup_gemini() # 测试API密钥是否有效
119
+ logger.info("应用启动完成")
120
+ yield
121
+ except Exception as e:
122
+ logger.error(f"应用启动失败: {str(e)}")
123
+ raise
124
+ finally:
125
+ logger.info("应用关闭")
126
+
127
+
128
+ # 创建FastAPI应用实例
129
+ app = FastAPI(
130
+ lifespan=lifespan,
131
+ title="Gemini Official API (ap2)",
132
+ version="1.3.0"
133
+ )
134
+
135
+ # 添加CORS中间件
136
+ app.add_middleware(
137
+ CORSMiddleware,
138
+ allow_origins=["*"],
139
+ allow_credentials=True,
140
+ allow_methods=["*"],
141
+ allow_headers=["*"],
142
+ )
143
+
144
+ # API密钥列表 (已更新为您提供的新密钥)
145
+ API_KEYS = [
146
+ 'AIzaSyBWjC2HldRFAd0kIgnXvcROfgd1Rj1DRkU',
147
+ 'AIzaSyCRWJY32j1ANzPoKsrlVRSEcFIkzYRiKh8'
148
+ ]
149
+
150
+
151
+ def get_random_api_key():
152
+ """获取随机API密钥"""
153
+ return random.choice(API_KEYS)
154
+
155
+
156
+ def setup_gemini(api_key=None):
157
+ """配置Gemini API"""
158
+ if not api_key:
159
+ api_key = get_random_api_key()
160
+
161
+ if not API_KEYS:
162
+ logger.error("请设置有效的API密钥列表")
163
+ raise ValueError("API_KEYS未设置")
164
+
165
+ client = genai.Client(api_key=api_key)
166
+ return client, api_key
167
+
168
+
169
+ # 配置安全设置
170
+ SAFETY_SETTINGS = [
171
+ types.SafetySetting(
172
+ category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
173
+ threshold=types.HarmBlockThreshold.BLOCK_NONE,
174
+ ),
175
+ types.SafetySetting(
176
+ category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
177
+ threshold=types.HarmBlockThreshold.BLOCK_NONE,
178
+ ),
179
+ types.SafetySetting(
180
+ category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
181
+ threshold=types.HarmBlockThreshold.BLOCK_NONE,
182
+ ),
183
+ types.SafetySetting(
184
+ category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
185
+ threshold=types.HarmBlockThreshold.BLOCK_NONE,
186
+ ),
187
+ types.SafetySetting(
188
+ category=types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
189
+ threshold=types.HarmBlockThreshold.BLOCK_NONE,
190
+ ),
191
+ ]
192
+
193
+
194
+ async def try_generate_content(model_name, content_parts, config, max_retries=3):
195
+ """带重试机制的内容生成"""
196
+ last_error = None
197
+ used_keys = set()
198
+
199
+ for attempt in range(max_retries):
200
+ try:
201
+ available_keys = [key for key in API_KEYS if key not in used_keys]
202
+ if not available_keys:
203
+ used_keys.clear()
204
+ available_keys = API_KEYS
205
+
206
+ api_key = random.choice(available_keys)
207
+ used_keys.add(api_key)
208
+
209
+ client, current_key = setup_gemini(api_key)
210
+ logger.info(f"尝试第 {attempt + 1} 次,使用密钥: {current_key[:20]}...")
211
+
212
+ response = client.models.generate_content(
213
+ model=model_name,
214
+ contents=content_parts,
215
+ config=config
216
+ )
217
+
218
+ return response, current_key
219
+
220
+ except Exception as e:
221
+ last_error = e
222
+ error_str = str(e).lower()
223
+
224
+ if any(code in error_str for code in ['400', '401', '403', '429', '500', '502', '503', '504']):
225
+ logger.warning(f"第 {attempt + 1} 次尝试失败: {str(e)}")
226
+ if attempt < max_retries - 1:
227
+ await asyncio.sleep(1)
228
+ continue
229
+ else:
230
+ raise e
231
+
232
+ raise last_error
233
+
234
+
235
+ @app.post("/v1/chat/completions")
236
+ async def chat_completions(request: Request):
237
+ """聊天对话接口"""
238
+ try:
239
+ body = await request.json()
240
+ messages = body.get('messages', [])
241
+ stream = body.get('stream', False)
242
+ max_tokens = body.get('max_tokens', 65536)
243
+ temperature = body.get('temperature', 1.2)
244
+ top_p = body.get('top_p', 0.0)
245
+ requested_model = body.get('model', 'gemini-2.5-flash')
246
+
247
+ model_name = get_model_name(requested_model)
248
+ content_parts, system_instruction = convert_messages(messages)
249
+
250
+ config = types.GenerateContentConfig(
251
+ max_output_tokens=max_tokens,
252
+ temperature=temperature,
253
+ top_p=top_p,
254
+ system_instruction=system_instruction,
255
+ safety_settings=SAFETY_SETTINGS,
256
+ )
257
+
258
+ if stream:
259
+ client, api_key = setup_gemini()
260
+ return StreamingResponse(
261
+ stream_response_with_retry(client, model_name, content_parts, config),
262
+ media_type='text/event-stream'
263
+ )
264
+ else:
265
+ response, used_key = await try_generate_content(model_name, content_parts, config)
266
+ response_text = response.text if response else ""
267
+ finish_reason = "stop"
268
+ if not response_text:
269
+ response_text = "无法生成回复。请尝试修改您的问题。"
270
+
271
+ logger.info(f"成功生成回复,使用密钥: {used_key[:20]}...")
272
+ return {
273
+ 'id': f'chatcmpl-{int(time.time())}-{random.randint(1000, 9999)}',
274
+ 'object': 'chat.completion',
275
+ 'created': int(time.time()),
276
+ 'model': requested_model,
277
+ 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': response_text}, 'finish_reason': finish_reason}],
278
+ 'usage': {'prompt_tokens': len(content_parts), 'completion_tokens': len(response_text.split()), 'total_tokens': len(content_parts) + len(response_text.split())}
279
+ }
280
+ except Exception as e:
281
+ logger.error(f"处理聊天请求出错: {str(e)}")
282
+ error_message, finish_reason = handle_error(e)
283
+ raise HTTPException(status_code=500, detail=str(e))
284
+
285
+
286
+ async def stream_response_with_retry(client, model_name, content_parts, config, max_retries=3):
287
+ """带重试机制的流式响应生成器"""
288
+ last_error = None
289
+ used_keys = set()
290
+ for attempt in range(max_retries):
291
+ try:
292
+ available_keys = [key for key in API_KEYS if key not in used_keys]
293
+ if not available_keys:
294
+ used_keys.clear()
295
+ available_keys = API_KEYS
296
+ api_key = random.choice(available_keys)
297
+ used_keys.add(api_key)
298
+
299
+ current_client, current_key = setup_gemini(api_key)
300
+ logger.info(f"流式响应尝试第 {attempt + 1} 次,使用密钥: {current_key[:20]}...")
301
+
302
+ for chunk in current_client.models.generate_content_stream(model=model_name, contents=content_parts, config=config):
303
+ if chunk and hasattr(chunk, 'text') and chunk.text:
304
+ data = {'id': f'chatcmpl-{int(time.time())}-{random.randint(1000, 9999)}', 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': model_name, 'choices': [{'index': 0, 'delta': {'role': 'assistant', 'content': chunk.text}, 'finish_reason': None}]}
305
+ yield f'data: {json.dumps(data, ensure_ascii=False)}\n\n'
306
+ await asyncio.sleep(0.01)
307
+
308
+ final_data = {'id': f'chatcmpl-{int(time.time())}-{random.randint(1000, 9999)}', 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': model_name, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]}
309
+ yield f'data: {json.dumps(final_data, ensure_ascii=False)}\n\n'
310
+ yield 'data: [DONE]\n\n'
311
+
312
+ logger.info(f"流式响应成功,使用密钥: {current_key[:20]}...")
313
+ return
314
+ except Exception as e:
315
+ last_error = e
316
+ error_str = str(e).lower()
317
+ if any(code in error_str for code in ['400', '401', '403', '429', '500', '502', '503', '504']):
318
+ logger.warning(f"流式响应第 {attempt + 1} 次尝试失败: {str(e)}")
319
+ if attempt < max_retries - 1:
320
+ await asyncio.sleep(1)
321
+ continue
322
+ else:
323
+ break
324
+ logger.error(f"流式响应所有重试失败: {str(last_error)}")
325
+ error_message, finish_reason = handle_error(last_error)
326
+ error_data = {'id': f'chatcmpl-{int(time.time())}-error', 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': model_name, 'choices': [{'index': 0, 'delta': {'role': 'assistant', 'content': error_message}, 'finish_reason': finish_reason}]}
327
+ yield f'data: {json.dumps(error_data, ensure_ascii=False)}\n\n'
328
+ yield 'data: [DONE]\n\n'
329
+
330
+ @app.get("/v1/models")
331
+ async def list_models():
332
+ """获取可用模型列表"""
333
+ try:
334
+ return {"object": "list", "data": SUPPORTED_MODELS}
335
+ except Exception as e:
336
+ logger.error(f"获取模型列表出错: {str(e)}")
337
+ raise HTTPException(status_code=500, detail=str(e))
338
+
339
+ @app.get("/v1/models/{model_id}")
340
+ async def get_model_info(model_id: str):
341
+ """获取特定模型信息"""
342
+ try:
343
+ for model in SUPPORTED_MODELS:
344
+ if model["id"] == model_id:
345
+ return model
346
+ raise HTTPException(status_code=404, detail=f"模型 {model_id} 未找到")
347
+ except HTTPException:
348
+ raise
349
+ except Exception as e:
350
+ logger.error(f"获取模型信息出错: {str(e)}")
351
+ raise HTTPException(status_code=500, detail=str(e))
352
+
353
+ @app.get("/v1/chat/completions/v1/models")
354
+ async def list_models_alternative():
355
+ """获取可用模型列表 - 兼容路径"""
356
+ try:
357
+ return {"object": "list", "data": SUPPORTED_MODELS}
358
+ except Exception as e:
359
+ logger.error(f"获取模型列表出错: {str(e)}")
360
+ raise HTTPException(status_code=500, detail=str(e))
361
+
362
+ @app.get("/health")
363
+ async def health_check():
364
+ """健康检查端点"""
365
+ try:
366
+ return {"status": "healthy", "timestamp": int(time.time()), "api": "gemini-official", "available_models": [model["id"] for model in SUPPORTED_MODELS], "version": "1.3.0"}
367
+ except Exception as e:
368
+ logger.error(f"健康检查失败: {str(e)}")
369
+ return {"status": "unhealthy", "timestamp": int(time.time()), "error": str(e)}
370
+
371
+ @app.get("/")
372
+ async def root():
373
+ """根路径信息"""
374
+ return {"name": "Gemini Official API (ap2)", "version": "1.3.0", "description": "Google Gemini官方API接口服务", "endpoints": {"models": "/v1/models", "models_alt": "/v1/chat/completions/v1/models", "chat": "/v1/chat/completions", "health": "/health"}}
375
+
376
+ @app.exception_handler(404)
377
+ async def not_found_handler(request: Request, exc: HTTPException):
378
+ """处理404错误"""
379
+ return {"error": "未找到", "requested_path": str(request.url.path), "message": "请求的路径不存在", "available_endpoints": {"models": "/v1/models", "models_alt": "/v1/chat/completions/v1/models", "chat": "/v1/chat/completions", "health": "/health", "info": "/"}}
380
+
381
+ if __name__ == "__main__":
382
+ port = int(os.environ.get("PORT", 7861))
383
+ print(f"🚀 启动Gemini官方API服务器于端口 {port}")
384
+ print(f"📊 支持的模型: {[model['id'] for model in SUPPORTED_MODELS]}")
385
+ print(f"🔑 已配置 {len(API_KEYS)} 个API密钥")
386
+ print("🔄 支持自动重试和密钥轮换")
387
+ uvicorn.run(app, host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ google-genai
4
+ loguru
5
+ httpx