import os from fastapi import FastAPI from langchain.prompts import ChatPromptTemplate from langchain_gemini import ChatGemini # Assuming there’s a Gemini integration from langchain import HuggingFaceChat # Hugging Face integration from langserve import add_routes import uvicorn import nest_asyncio # Allows the use of uvicorn.run in environments like Jupyter nest_asyncio.apply() # Retrieve HF token gemini_api_key = os.getenv("AIzaSyBo5SPd4H4gM0ONHBXRHAYoc973szdrfk4") # Retrieve Gemini API key # Initialize the FastAPI app app = FastAPI( title="Multimodal Language Server", version="1.0", description="A simple QnA API Server using both Hugging Face and Gemini models" ) # Initialize the LLaMA model using Hugging Face llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN) # Use the specific LLaMA model from HF # Initialize the Gemini model (adjust based on the actual integration) gemini_model = ChatGemini(api_key=gemini_api_key, model="gemini_model_name_here") # Specify the correct model name # Define a QnA prompt using a template qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}") # Function to choose model based on preference # You might want to improve this to include more refined selection criteria def get_model_response(question, use_gemini=False): if use_gemini: return gemini_model(question) # Call the Gemini model else: return llama_model(question) # Call the Hugging Face model # Create an API endpoint @app.post("/llm_api") async def qna_endpoint(question: str, use_gemini: bool = False): """ Endpoint to receive a question and return a response from either the Hugging Face or Gemini model. """ response = get_model_response(question, use_gemini) return {"response": response} # Run the application if __name__ == "__main__": try: uvicorn.run(app, host="0.0.0.0", port=8000) # Changed to IPv4 except KeyboardInterrupt: print("Server stopped manually.") except Exception as e: print(f"An error occurred: {e}")