Update app.py
Browse files
app.py
CHANGED
@@ -13,9 +13,9 @@ class GenModel(BaseModel):
|
|
13 |
question: str
|
14 |
system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
|
15 |
temperature: float = 0.8
|
16 |
-
seed: int = 101
|
17 |
-
mirostat_mode: int=2
|
18 |
-
mirostat_tau: float=4.0
|
19 |
mirostat_eta: float=1.1
|
20 |
|
21 |
llm_chat = llama_cpp.Llama.from_pretrained(
|
@@ -77,12 +77,11 @@ async def chat(gen:GenModel):
|
|
77 |
seed=gen.seed,
|
78 |
#stream=True
|
79 |
)
|
80 |
-
messages.append({"role": "user", "content": gen.question}
|
81 |
print(output)
|
82 |
-
|
83 |
et = time()
|
84 |
output["time"] = et - st
|
85 |
-
messages.append({'role': "assistant", "content": output['choices'][0]['message']})
|
86 |
print(messages)
|
87 |
return output
|
88 |
except Exception as e:
|
@@ -94,9 +93,9 @@ async def chat(gen:GenModel):
|
|
94 |
# Chat Completion API
|
95 |
@app.post("/generate")
|
96 |
async def generate(gen:GenModel):
|
97 |
-
gen.system = "You are an AI assistant."
|
98 |
gen.temperature = 0.5
|
99 |
-
gen.seed
|
100 |
try:
|
101 |
st = time()
|
102 |
output = llm_generate.create_chat_completion(
|
@@ -119,7 +118,7 @@ async def generate(gen:GenModel):
|
|
119 |
#print(chunk)
|
120 |
"""
|
121 |
et = time()
|
122 |
-
|
123 |
return output
|
124 |
except Exception as e:
|
125 |
logger.error(f"Error in /generate endpoint: {e}")
|
|
|
13 |
question: str
|
14 |
system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
|
15 |
temperature: float = 0.8
|
16 |
+
seed: int = 101
|
17 |
+
mirostat_mode: int=2
|
18 |
+
mirostat_tau: float=4.0
|
19 |
mirostat_eta: float=1.1
|
20 |
|
21 |
llm_chat = llama_cpp.Llama.from_pretrained(
|
|
|
77 |
seed=gen.seed,
|
78 |
#stream=True
|
79 |
)
|
80 |
+
messages.append({"role": "user", "content": gen.question})
|
81 |
print(output)
|
|
|
82 |
et = time()
|
83 |
output["time"] = et - st
|
84 |
+
messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
|
85 |
print(messages)
|
86 |
return output
|
87 |
except Exception as e:
|
|
|
93 |
# Chat Completion API
|
94 |
@app.post("/generate")
|
95 |
async def generate(gen:GenModel):
|
96 |
+
gen.system = "You are an helpful medical AI assistant."
|
97 |
gen.temperature = 0.5
|
98 |
+
gen.seed = 42
|
99 |
try:
|
100 |
st = time()
|
101 |
output = llm_generate.create_chat_completion(
|
|
|
118 |
#print(chunk)
|
119 |
"""
|
120 |
et = time()
|
121 |
+
output["time"] = et - st
|
122 |
return output
|
123 |
except Exception as e:
|
124 |
logger.error(f"Error in /generate endpoint: {e}")
|