Spaces:
Sleeping
Sleeping
import os | |
from fastapi import FastAPI | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_gemini import ChatGemini # Assuming there’s a Gemini integration | |
from langchain import HuggingFaceChat # Hugging Face integration | |
from langserve import add_routes | |
import uvicorn | |
import nest_asyncio | |
# Allows the use of uvicorn.run in environments like Jupyter | |
nest_asyncio.apply() | |
# Retrieve HF token | |
gemini_api_key = os.getenv("AIzaSyBo5SPd4H4gM0ONHBXRHAYoc973szdrfk4") # Retrieve Gemini API key | |
# Initialize the FastAPI app | |
app = FastAPI( | |
title="Multimodal Language Server", | |
version="1.0", | |
description="A simple QnA API Server using both Hugging Face and Gemini models" | |
) | |
# Initialize the LLaMA model using Hugging Face | |
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF | |
# Initialize the Gemini model (adjust based on the actual integration) | |
gemini_model = ChatGemini(api_key=gemini_api_key, model="gemini_model_name_here") # Specify the correct model name | |
# Define a QnA prompt using a template | |
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}") | |
# Function to choose model based on preference | |
# You might want to improve this to include more refined selection criteria | |
def get_model_response(question, use_gemini=False): | |
if use_gemini: | |
return gemini_model(question) # Call the Gemini model | |
else: | |
return llama_model(question) # Call the Hugging Face model | |
# Create an API endpoint | |
async def qna_endpoint(question: str, use_gemini: bool = False): | |
""" | |
Endpoint to receive a question and return a response from either the Hugging Face or Gemini model. | |
""" | |
response = get_model_response(question, use_gemini) | |
return {"response": response} | |
# Run the application | |
if __name__ == "__main__": | |
try: | |
uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4 | |
except KeyboardInterrupt: | |
print("Server stopped manually.") | |
except Exception as e: | |
print(f"An error occurred: {e}") |