File size: 4,494 Bytes
c625a8c ed1e95d c625a8c 1dfd50d 654eaa0 30b9c64 654eaa0 f88f764 b94326e b2c95c6 6a34b4c d1f386f 609ebbf aad9e06 b9c177c cefc820 436d8e0 2e8b1ec cefc820 609ebbf 4cb18b7 608c192 df24c3c 609ebbf 5f76c8c 609ebbf cc15d4e 609ebbf 4cb18b7 e06db31 df24c3c 654eaa0 609ebbf ef7bf1f 682ac66 213eaca 654eaa0 c625a8c 40d7f6a 1ede826 ef7bf1f 1ede826 1182d2f c625a8c 051e53e c625a8c f88f764 c625a8c 051e53e 43f6d46 941cbbb c625a8c 609ebbf a010ff1 3c58d3e ef6577b c625a8c 0d38122 886ba9c c625a8c 8766e00 886ba9c 1dfd50d 5b0eb6a c625a8c 1322444 051e53e 3523ac0 aad9e06 96cc7ba aad9e06 1322444 d861c90 1322444 08499cc 886ba9c 1322444 96cc7ba ef7bf1f 1322444 ef7bf1f 1322444 a161c80 ef7bf1f 1322444 aad9e06 3cc3cf4 1322444 7e33769 1322444 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import fastapi
from fastapi.responses import JSONResponse
from fastapi_users import schemas
from time import time
#from fastapi.middleware.cors import CORSMiddleware
#MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
import logging
import llama_cpp
import llama_cpp.llama_tokenizer
from pydantic import BaseModel
from fastapi import APIRouter
from app.users import current_active_user
class GenModel(BaseModel):
question: str
system: str = "You are a helpful medical AI chat assistant. Help as much as you can.Also continuously ask for possible symptoms in order to atat a conclusive ailment or sickness and possible solutions.Remember, response in English."
temperature: float = 0.8
seed: int = 101
mirostat_mode: int=2
mirostat_tau: float=4.0
mirostat_eta: float=1.1
class ChatModel(BaseModel):
question: list
system: str = "You are a helpful AI assistant. You are chatting with a human. Help as much as you can."
#Also continuously ask for possible symptoms in order to atat a conclusive ailment or sickness and possible solutions.Remember, response in English."
temperature: float = 0.8
seed: int = 101
mirostat_mode: int=2
mirostat_tau: float=4.0
mirostat_eta: float=1.1
llm_chat = llama_cpp.Llama.from_pretrained(
repo_id="moriire/healthcare-ai-q2_k",
filename="*.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("moriire/healthcare-ai-q2_k"),
verbose=False,
n_ctx=256,
n_gpu_layers=0,
#chat_format="llama-2"
)
llm_generate = llama_cpp.Llama.from_pretrained(
repo_id="moriire/healthcare-ai-q2_k",
filename="*.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("moriire/healthcare-ai-q2_k"),
verbose=False,
n_ctx=4096,
n_gpu_layers=0,
mirostat_mode=2,
mirostat_tau=4.0,
mirostat_eta=1.1,
#chat_format="llama-2"
)
# Logger setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
#app = fastapi.FastAPI(
#title="OpenGenAI",
#description="Your Excellect AI Physician")
"""
app.add_middleware(
CORSMiddleware,
allow_origins = ["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"]
)
"""
llm_router = APIRouter(prefix="/llm")
@llm_router.get("/health", tags=["llm"])
def health():
return {"status": "ok"}
# Chat Completion API
@llm_router.post("/chat/", tags=["llm"])
async def chat(chatm:ChatModel):#, user: schemas.BaseUser = fastapi.Depends(current_active_user)):
#chatm.system = chatm.system.format("")#user.email)
try:
st = time()
output = llm_chat.create_chat_completion(
messages = chatm.question,
temperature = chatm.temperature,
seed = chatm.seed,
#stream=True
)
print(output)
#print(output)
et = time()
output["time"] = et - st
#messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
#print(messages)
return output
except Exception as e:
logger.error(f"Error in /complete endpoint: {e}")
return JSONResponse(
status_code=500, content={"message": "Internal Server Error"}
)
# Chat Completion API
@llm_router.post("/generate", tags=["llm"])
async def generate(gen:GenModel):#, user: schemas.BaseUser = fastapi.Depends(current_active_user)):
gen.system = "You are an helpful medical AI assistant."
gen.temperature = 0.5
gen.seed = 42
try:
st = time()
output = llm_generate.create_chat_completion(
messages=[
{"role": "system", "content": gen.system},
{"role": "user", "content": gen.question},
],
temperature = gen.temperature,
seed= gen.seed,
#stream=True,
#echo=True
)
"""
for chunk in output:
delta = chunk['choices'][0]['delta']
if 'role' in delta:
print(delta['role'], end=': ')
elif 'content' in delta:
print(delta['content'], end='')
#print(chunk)
"""
et = time()
output["time"] = et - st
return output
except Exception as e:
logger.error(f"Error in /generate endpoint: {e}")
return JSONResponse(
status_code=500, content={"message": "Internal Server Error"}
)
|