from fastapi import FastAPI, Request, Response from pydantic import BaseModel from webscout.LLM import LLM from typing import Union, Dict, List, Any app = FastAPI() class Model(BaseModel): id: str object: str created: int owned_by: str class Message(BaseModel): role: str content: str class CompletionRequest(BaseModel): model: str messages: List[Message] class CompletionResponse(BaseModel): id: str object: str created: int model: str choices: List[Dict[str, Any]] usage: Dict[str, int] models = [ {"id": "meta-llama/Meta-Llama-3-70B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}, {"id": "google/gemma-2-27b-it", "object": "model", "created": 1686935002, "owned_by": "meta"}, {"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"}, {"id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", "object": "model", "created": 1686935002, "owned_by": "cognitivecomputations"}, {"id": "nvidia/Nemotron-4-340B-Instruct", "object": "model", "created": 1686935002, "owned_by": "nvidia"}, {"id": "Qwen/Qwen2-72B-Instruct", "object": "model", "created": 1686935002, "owned_by": "qwen"}, {"id": "microsoft/Phi-3-medium-4k-instruct", "object": "model", "created": 1686935002, "owned_by": "microsoft"}, {"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"}, {"id": "openchat/openchat-3.6-8b", "object": "model", "created": 1686935002, "owned_by": "unknown"}, {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model", "created": 1686935002, "owned_by": "mistral"}, {"id": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}, {"id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"}, {"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"}, {"id": "Qwen/Qwen2-7B-Instruct", "object": "model", "created": 1686935002, "owned_by": "Qwen"}, {"id": "meta-llama/Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"} ] @app.post("/v1/chat/completions/") def handle_completions(completion_request: CompletionRequest): system_prompt = next((message.content for message in completion_request.messages if message.role == 'system'), None) user_query = next((message.content for message in completion_request.messages if message.role == 'user'), None) response_text = generative(query=user_query, system_prompt=system_prompt, model=completion_request.model) response = CompletionResponse( id="chatcmpl-1", object="chat.completion", created=1234567890, model=completion_request.model, choices=[{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}], usage={"prompt_tokens": sum(len(message.content.split()) for message in completion_request.messages), "total_tokens": sum(len(message.content.split()) for message in completion_request.messages) + len(response_text.split())} ) return response @app.get("/v1/models/") def get_models(): return {"object": "list", "data": models} @app.post("/v1/completions/") def create_completion(prompt: str, model: str, best_of: int = 1, echo: bool = False, frequency_penalty: float = 0.0): response_text = generative(prompt, "you are an helpful assistant", model) response = { "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", "object": "text_completion", "created": 1589478378, "model": model, "system_fingerprint": "fp_44709d6fcb", "choices": [{"text": response_text, "index": 0, "logprobs": None, "finish_reason": "length"}] } return response def generative(system_prompt, query, model): llm = LLM(model=model, system_message=system_prompt) messages = [{"role": "user", "content": query}] response = llm.chat(messages) return response if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)