File size: 2,098 Bytes
2501763
 
 
45ad282
 
2501763
 
 
 
 
 
 
45ad282
 
 
2501763
 
45ad282
2501763
45ad282
2501763
 
45ad282
 
 
 
 
2501763
 
 
 
45ad282
 
 
 
 
 
 
 
 
b121316
45ad282
 
 
 
 
 
2501763
 
 
 
5da0a87
2501763
 
 
201fa97
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from fastapi import FastAPI
from langchain.prompts import ChatPromptTemplate
from langchain_gemini import ChatGemini  # Assuming there’s a Gemini integration
from langchain import HuggingFaceChat  # Hugging Face integration
from langserve import add_routes
import uvicorn
import nest_asyncio

# Allows the use of uvicorn.run in environments like Jupyter
nest_asyncio.apply()

  # Retrieve HF token
gemini_api_key = os.getenv("AIzaSyBo5SPd4H4gM0ONHBXRHAYoc973szdrfk4")  # Retrieve Gemini API key

# Initialize the FastAPI app
app = FastAPI(
    title="Multimodal Language Server",
    version="1.0",
    description="A simple QnA API Server using both Hugging Face and Gemini models"
)

# Initialize the LLaMA model using Hugging Face
llama_model = HuggingFaceChat(model="meta-llama/LLaMA-3-2", token=HF_TOKEN)  # Use the specific LLaMA model from HF

# Initialize the Gemini model (adjust based on the actual integration)
gemini_model = ChatGemini(api_key=gemini_api_key, model="gemini_model_name_here")  # Specify the correct model name

# Define a QnA prompt using a template
qna_prompt = ChatPromptTemplate.from_template("Answer the question: {question}")

# Function to choose model based on preference
# You might want to improve this to include more refined selection criteria
def get_model_response(question, use_gemini=False):
    if use_gemini:
        return gemini_model(question)  # Call the Gemini model
    else:
        return llama_model(question)  # Call the Hugging Face model

# Create an API endpoint
@app.post("/llm_api")
async def qna_endpoint(question: str, use_gemini: bool = False):
    """
    Endpoint to receive a question and return a response from either the Hugging Face or Gemini model.
    """
    response = get_model_response(question, use_gemini)
    return {"response": response}

# Run the application
if __name__ == "__main__":
    try:
        uvicorn.run(app, host="0.0.0.0", port=8000)  # Changed to IPv4
    except KeyboardInterrupt:
        print("Server stopped manually.")
    except Exception as e:
        print(f"An error occurred: {e}")