Spaces:
Sleeping
Sleeping
Update gemini_fastapi_server.py
Browse files- gemini_fastapi_server.py +27 -13
gemini_fastapi_server.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import os
|
2 |
from fastapi import FastAPI
|
3 |
from langchain.prompts import ChatPromptTemplate
|
4 |
-
from langchain_gemini import ChatGemini #
|
|
|
5 |
from langserve import add_routes
|
6 |
import uvicorn
|
7 |
import nest_asyncio
|
@@ -9,28 +10,41 @@ import nest_asyncio
|
|
9 |
# Allows the use of uvicorn.run in environments like Jupyter
|
10 |
nest_asyncio.apply()
|
11 |
|
|
|
|
|
|
|
12 |
# Initialize the FastAPI app
|
13 |
app = FastAPI(
|
14 |
-
title="
|
15 |
version="1.0",
|
16 |
-
description="A simple QnA API Server using
|
17 |
)
|
18 |
|
19 |
-
# Initialize the LLaMA model using
|
20 |
-
|
21 |
-
|
|
|
|
|
22 |
|
23 |
# Define a QnA prompt using a template
|
24 |
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")
|
25 |
|
26 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
@app.post("/llm_api")
|
28 |
-
async def qna_endpoint(question: str):
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
|
35 |
# Run the application
|
36 |
if __name__ == "__main__":
|
|
|
1 |
import os
|
2 |
from fastapi import FastAPI
|
3 |
from langchain.prompts import ChatPromptTemplate
|
4 |
+
from langchain_gemini import ChatGemini # Assuming there’s a Gemini integration
|
5 |
+
from langchain import HuggingFaceChat # Hugging Face integration
|
6 |
from langserve import add_routes
|
7 |
import uvicorn
|
8 |
import nest_asyncio
|
|
|
10 |
# Allows the use of uvicorn.run in environments like Jupyter
|
11 |
nest_asyncio.apply()
|
12 |
|
13 |
+
# Retrieve HF token
|
14 |
+
gemini_api_key = os.getenv("AIzaSyBo5SPd4H4gM0ONHBXRHAYoc973szdrfk4") # Retrieve Gemini API key
|
15 |
+
|
16 |
# Initialize the FastAPI app
|
17 |
app = FastAPI(
|
18 |
+
title="Multimodal Language Server",
|
19 |
version="1.0",
|
20 |
+
description="A simple QnA API Server using both Hugging Face and Gemini models"
|
21 |
)
|
22 |
|
23 |
+
# Initialize the LLaMA model using Hugging Face
|
24 |
+
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF
|
25 |
+
|
26 |
+
# Initialize the Gemini model (adjust based on the actual integration)
|
27 |
+
gemini_model = ChatGemini(api_key=gemini_api_key, model="gemini_model_name_here") # Specify the correct model name
|
28 |
|
29 |
# Define a QnA prompt using a template
|
30 |
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")
|
31 |
|
32 |
+
# Function to choose model based on preference
|
33 |
+
# You might want to improve this to include more refined selection criteria
|
34 |
+
def get_model_response(question, use_gemini=False):
|
35 |
+
if use_gemini:
|
36 |
+
return gemini_model(question) # Call the Gemini model
|
37 |
+
else:
|
38 |
+
return llama_model(question) # Call the Hugging Face model
|
39 |
+
|
40 |
+
# Create an API endpoint
|
41 |
@app.post("/llm_api")
|
42 |
+
async def qna_endpoint(question: str, use_gemini: bool = False):
|
43 |
+
"""
|
44 |
+
Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
|
45 |
+
"""
|
46 |
+
response = get_model_response(question, use_gemini)
|
47 |
+
return {"response": response}
|
48 |
|
49 |
# Run the application
|
50 |
if __name__ == "__main__":
|