kevinkal commited on
Commit
e90ba75
·
verified ·
1 Parent(s): b5053e4

Update app.py with Gemini multimodal

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -4,6 +4,7 @@ from pydantic import BaseModel
4
  from typing import Annotated
5
  from mistralai import Mistral
6
  from google import genai
 
7
  from auth import verify_token
8
  import os
9
 
@@ -52,4 +53,25 @@ async def gemini(request: LLMRequest, token: Annotated[str, Depends(verify_token
52
  if chunk.text:
53
  yield chunk.text
54
 
55
- return StreamingResponse(generate(), media_type="text/plain")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from typing import Annotated
5
  from mistralai import Mistral
6
  from google import genai
7
+ from google.genai import types
8
  from auth import verify_token
9
  import os
10
 
 
53
  if chunk.text:
54
  yield chunk.text
55
 
56
+ return StreamingResponse(generate(), media_type="text/plain")
57
+
58
+ class GeminiMultimodalRequest(BaseModel):
59
+ model: str
60
+ prompt: str
61
+ image: str # url or base64
62
+
63
+ @app.post("/gemini/multimodal")
64
+ async def gemini_multimodal(request: GeminiMultimodalRequest, token: Annotated[str, Depends(verify_token)]):
65
+ if request.image.startswith('http'):
66
+ async with httpx.AsyncClient() as client:
67
+ image = await client.get(request.image)
68
+ image = types.Part.from_bytes(image.content, "image/jpeg")
69
+ else:
70
+ image = types.Part.from_bytes(request.image.encode(), "image/jpeg")
71
+
72
+ response = gemini_client.models.generate_content(
73
+ model=request.model,
74
+ contents=[request.prompt, image]
75
+ )
76
+
77
+ return {"response": response.text}