File size: 4,599 Bytes
c625a8c ed1e95d c625a8c 1dfd50d 654eaa0 30b9c64 654eaa0 f88f764 b94326e b2c95c6 6a34b4c d1f386f 609ebbf aad9e06 b9c177c cefc820 71778ca cefc820 609ebbf e0024f6 f8392b7 d9ff7d3 609ebbf 5e9f4b2 609ebbf 71778ca 609ebbf 3ace823 09f4627 654eaa0 609ebbf ef7bf1f 682ac66 213eaca 654eaa0 c625a8c 40d7f6a 1ede826 ef7bf1f 1ede826 1182d2f c625a8c 051e53e c625a8c f88f764 c625a8c 051e53e 43f6d46 941cbbb c625a8c 609ebbf a010ff1 3c58d3e ef6577b c625a8c 0d38122 886ba9c c625a8c 8766e00 886ba9c 1dfd50d 5b0eb6a c625a8c 1322444 051e53e 3523ac0 aad9e06 96cc7ba aad9e06 1322444 72b576d de46666 96cc7ba a389a25 cb2b08b 602e5e4 1322444 c2e7cc6 1322444 1631980 1322444 a161c80 c2e7cc6 72b576d 602e5e4 1322444 7e33769 1322444 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import fastapi
from fastapi.responses import JSONResponse
from fastapi_users import schemas
from time import time
#from fastapi.middleware.cors import CORSMiddleware
#MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
import logging
import llama_cpp
import llama_cpp.llama_tokenizer
from pydantic import BaseModel
from fastapi import APIRouter
from app.users import current_active_user
class GenModel(BaseModel):
question: str
system: str = "You are a helpful medical AI chat assistant. Help as much as you can.Also continuously ask for possible symptoms in order to atat a conclusive ailment or sickness and possible solutions.Remember, response in English."
temperature: float = 0.8
seed: int = 101
mirostat_mode: int=2
mirostat_tau: float=4.0
mirostat_eta: float=1.1
class ChatModel(BaseModel):
question: list
system: str = "You are chatDoctor, a helpful health and medical assistant. You are chatting with a human. Help as much as you can. Also continuously ask for possible symptoms in order to a conclusive ailment or sickness and possible solutions.Remember, response in English."
temperature: float = 0.8
seed: int = 101
mirostat_mode: int=2
mirostat_tau: float=4.0
mirostat_eta: float=1.1
llm_chat = llama_cpp.Llama.from_pretrained(
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
filename="*q4_0.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat"),
verbose=False,
n_ctx=512,
n_gpu_layers=0,
#chat_format="llama-2"
)
llm_generate = llama_cpp.Llama.from_pretrained(
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
filename="*q4_0.gguf",
#tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat"),
verbose=False,
n_ctx=4096,
n_gpu_layers=0,
mirostat_mode=2,
mirostat_tau=4.0,
mirostat_eta=1.1,
#chat_format="llama-2"
)
# Logger setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
#app = fastapi.FastAPI(
#title="OpenGenAI",
#description="Your Excellect AI Physician")
"""
app.add_middleware(
CORSMiddleware,
allow_origins = ["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"]
)
"""
llm_router = APIRouter(prefix="/llm")
@llm_router.get("/health", tags=["llm"])
def health():
return {"status": "ok"}
# Chat Completion API
@llm_router.post("/chat/", tags=["llm"])
async def chat(chatm:ChatModel):#, user: schemas.BaseUser = fastapi.Depends(current_active_user)):
#chatm.system = chatm.system.format("")#user.email)
try:
st = time()
output = llm_chat.create_chat_completion(
messages = chatm.question,
temperature = chatm.temperature,
seed = chatm.seed,
#stream=True
)
print(output)
#print(output)
et = time()
output["time"] = et - st
#messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
#print(messages)
return output
except Exception as e:
logger.error(f"Error in /complete endpoint: {e}")
return JSONResponse(
status_code=500, content={"message": "Internal Server Error"}
)
# Chat Completion API
@llm_router.post("/generate", tags=["llm"])
async def generate(gen:GenModel):#, user: schemas.BaseUser = fastapi.Depends(current_active_user)):
gen.system = "You are an helpful medical AI assistant."
gen.temperature = 0.5
gen.seed = 42
try:
#st = time()
output = llm_generate.create_completion(
#messages=[
# {"role": "system", "content": gen.system},
# {"role": "user", "content": gen.question},
# ],
gen.question,
temperature = gen.temperature,
seed= gen.seed,
#chat_format="llama-2",
stream=True,
echo = True
)
for chunk in output:
delta = chunk['choices'][0]#['delta']
print(delta)
if 'role' in delta:
print(delta['role'], end=': ')
elif 'content' in delta:
print(delta['content'], end='')
#print(chunk)
#et = time()
#output["time"] = et - st
#print(output)
except Exception as e:
logger.error(f"Error in /generate endpoint: {e}")
return JSONResponse(
status_code=500, content={"message": "Internal Server Error"}
)
|