Spaces:
Sleeping
Sleeping
File size: 2,098 Bytes
2501763 45ad282 2501763 45ad282 2501763 45ad282 2501763 45ad282 2501763 45ad282 2501763 45ad282 b121316 45ad282 2501763 5da0a87 2501763 201fa97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
from fastapi import FastAPI
from langchain.prompts import ChatPromptTemplate
from langchain_gemini import ChatGemini # Assuming there’s a Gemini integration
from langchain import HuggingFaceChat # Hugging Face integration
from langserve import add_routes
import uvicorn
import nest_asyncio
# Allows the use of uvicorn.run in environments like Jupyter
nest_asyncio.apply()
# Retrieve HF token
gemini_api_key = os.getenv("AIzaSyBo5SPd4H4gM0ONHBXRHAYoc973szdrfk4") # Retrieve Gemini API key
# Initialize the FastAPI app
app = FastAPI(
title="Multimodal Language Server",
version="1.0",
description="A simple QnA API Server using both Hugging Face and Gemini models"
)
# Initialize the LLaMA model using Hugging Face
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF
# Initialize the Gemini model (adjust based on the actual integration)
gemini_model = ChatGemini(api_key=gemini_api_key, model="gemini_model_name_here") # Specify the correct model name
# Define a QnA prompt using a template
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")
# Function to choose model based on preference
# You might want to improve this to include more refined selection criteria
def get_model_response(question, use_gemini=False):
if use_gemini:
return gemini_model(question) # Call the Gemini model
else:
return llama_model(question) # Call the Hugging Face model
# Create an API endpoint
@app.post("/llm_api")
async def qna_endpoint(question: str, use_gemini: bool = False):
"""
Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
"""
response = get_model_response(question, use_gemini)
return {"response": response}
# Run the application
if __name__ == "__main__":
try:
uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4
except KeyboardInterrupt:
print("Server stopped manually.")
except Exception as e:
print(f"An error occurred: {e}") |