llm-fastapi-1

Runtime error

App Files Files Community

llm-fastapi-1 / main.py

huedaya

Update main.py

27abc54 verified 10 months ago

raw

history blame contribute delete

2.11 kB

	from fastapi import FastAPI, Depends, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from huggingface_hub import InferenceClient
	import os
	from fastapi.security import OAuth2PasswordBearer
	from typing import Annotated

	oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

	app = FastAPI()

	# Allow all CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
	client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
	# client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")

	class Item(BaseModel):
	prompt: str
	history: list
	system_prompt: str
	temperature: float = 0.0
	max_new_tokens: int = 2048
	top_p: float = 0.15
	repetition_penalty: float = 1.0

	def format_prompt(message, history):
	return message

	def generate(item: Item):
	temperature = float(item.temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(item.top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=item.max_new_tokens,
	top_p=top_p,
	repetition_penalty=item.repetition_penalty,
	do_sample=True,
	seed=42,
	)

	formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = ""

	for response in stream:
	output += response.token.text
	return output

	@app.get("/")
	async def root():
	return {"status": "ok"}

	@app.head("/")
	async def root():
	return {"status": "ok"}


	@app.post("/generate")
	async def generate_text(item: Item, token: Annotated[str, Depends(oauth2_scheme)]):
	# Reject if not authenticated
	apiKey = os.environ.get("API_KEY")
	if apiKey != token:
	raise HTTPException(status_code=403, detail="Invalid API key")

	return {
	"response": generate(item)
	}