Spaces:
Sleeping
Sleeping
Reduce number of models to 2
Browse files- app/main.py +22 -22
app/main.py
CHANGED
@@ -15,7 +15,7 @@ from typing import Optional
|
|
15 |
print("Loading model...")
|
16 |
SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
|
17 |
FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
|
18 |
-
WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
|
19 |
# n_gpu_layers=28, # Uncomment to use GPU acceleration
|
20 |
# seed=1337, # Uncomment to set a specific seed
|
21 |
# n_ctx=2048, # Uncomment to increase the context window
|
@@ -45,7 +45,7 @@ def check_sentiment(text):
|
|
45 |
print("Testing model...")
|
46 |
assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
|
47 |
assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
|
48 |
-
assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
|
49 |
print("Ready.")
|
50 |
|
51 |
|
@@ -127,23 +127,23 @@ async def ask_gemmaFinanceTH(
|
|
127 |
return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
|
128 |
|
129 |
|
130 |
-
@app.post('/questions/open-ended')
|
131 |
-
async def ask_gemmaWild(
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
) -> QuestionResponse:
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
15 |
print("Loading model...")
|
16 |
SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
|
17 |
FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
|
18 |
+
# WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
|
19 |
# n_gpu_layers=28, # Uncomment to use GPU acceleration
|
20 |
# seed=1337, # Uncomment to set a specific seed
|
21 |
# n_ctx=2048, # Uncomment to increase the context window
|
|
|
45 |
print("Testing model...")
|
46 |
assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
|
47 |
assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
|
48 |
+
# assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
|
49 |
print("Ready.")
|
50 |
|
51 |
|
|
|
127 |
return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
|
128 |
|
129 |
|
130 |
+
# @app.post('/questions/open-ended')
|
131 |
+
# async def ask_gemmaWild(
|
132 |
+
# prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
|
133 |
+
# temperature: float = Body(0.5, embed=True),
|
134 |
+
# max_new_tokens: int = Body(200, embed=True)
|
135 |
+
# ) -> QuestionResponse:
|
136 |
+
# """
|
137 |
+
# Ask a finetuned Gemma an open-ended question..
|
138 |
+
# NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
|
139 |
+
# """
|
140 |
+
# if prompt:
|
141 |
+
# try:
|
142 |
+
# print(f'Asking GemmaWild with the question "{prompt}"')
|
143 |
+
# result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
|
144 |
+
# print(f"Result: {result}")
|
145 |
+
# return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
|
146 |
+
# except Exception as e:
|
147 |
+
# return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
|
148 |
+
# else:
|
149 |
+
# return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
|