GemmaFinetunes2

Sleeping

App Files Files Community

PawinC commited on Apr 25

Commit

7a0ef1d

•

1 Parent(s): ce36f28

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +39 -15

app/main.py CHANGED Viewed

@@ -15,6 +15,7 @@ from typing import Optional
 print("Loading model...")
 SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", mmap=False, mlock=True)
 FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", mmap=False, mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
@@ -23,9 +24,9 @@ FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", mmap=False, mlock
 def extract_restext(response):
   return response['choices'][0]['text'].strip()
-def ask_fi(question, max_new_tokens=200, temperature=0.5):
   prompt = f"""###User: {question}\n###Assistant:"""
-  result = extract_restext(FIllm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
   return result
 def check_sentiment(text):
@@ -43,7 +44,8 @@ def check_sentiment(text):
 # TESTING THE MODEL
 print("Testing model...")
 assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
-assert ask_fi("Hello!, How are you today?")
 print("Ready.")
@@ -70,12 +72,12 @@ class SA_Result(str, Enum):
   negative = "negative"
   unknown = "unknown"
-class SA_Response(BaseModel):
   code: int = 200
   text: Optional[str] = None
   result: SA_Result = None
-class FI_Response(BaseModel):
   code: int = 200
   question: Optional[str] = None
   answer: str = None
@@ -89,18 +91,18 @@ def docs():
   return responses.RedirectResponse('./docs')
 @app.post('/classifications/sentiment')
-async def perform_sentiment_analysis(prompt: str = Body(..., embed=True, example="I like eating fried chicken")) -> SA_Response:
   """Performs a sentiment analysis using a finetuned version of Gemma-7b"""
   if prompt:
     try:
       print(f"Checking sentiment for {prompt}")
       result = check_sentiment(prompt)
       print(f"Result: {result}")
-      return SA_Response(result=result, text=prompt)
     except Exception as e:
-      return HTTPException(500, SA_Response(code=500, result=str(e), text=prompt))
   else:
-    return HTTPException(400, SA_Response(code=400, result="Request argument 'prompt' not provided."))
 @app.post('/questions/finance')
@@ -108,18 +110,40 @@ async def ask_gemmaFinanceTH(
     prompt: str = Body(..., embed=True, example="What's the best way to invest my money"),
     temperature: float = Body(0.5, embed=True),
     max_new_tokens: int = Body(200, embed=True)
-) -> FI_Response:
   """
   Ask a finetuned Gemma a finance-related question, just for fun.
   NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
   """
   if prompt:
     try:
-      print(f'Asking FI with the question "{prompt}"')
-      result = ask_fi(prompt, max_new_tokens=max_new_tokens, temperature=temperature)
       print(f"Result: {result}")
-      return FI_Response(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
     except Exception as e:
-      return HTTPException(500, FI_Response(code=500, answer=str(e), question=prompt))
   else:
-    return HTTPException(400, FI_Response(code=400, answer="Request argument 'prompt' not provided."))

 print("Loading model...")
 SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", mmap=False, mlock=True)
 FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", mmap=False, mlock=True)
+WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", mmap=False, mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
 def extract_restext(response):
   return response['choices'][0]['text'].strip()
+def ask_llm(llm, question, max_new_tokens=200, temperature=0.5):
   prompt = f"""###User: {question}\n###Assistant:"""
+  result = extract_restext(llm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
   return result
 def check_sentiment(text):
 # TESTING THE MODEL
 print("Testing model...")
 assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
+assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
+assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
 print("Ready.")
   negative = "negative"
   unknown = "unknown"
+class SAResponse(BaseModel):
   code: int = 200
   text: Optional[str] = None
   result: SA_Result = None
+class QuestionResponse(BaseModel):
   code: int = 200
   question: Optional[str] = None
   answer: str = None
   return responses.RedirectResponse('./docs')
 @app.post('/classifications/sentiment')
+async def perform_sentiment_analysis(prompt: str = Body(..., embed=True, example="I like eating fried chicken")) -> SAResponse:
   """Performs a sentiment analysis using a finetuned version of Gemma-7b"""
   if prompt:
     try:
       print(f"Checking sentiment for {prompt}")
       result = check_sentiment(prompt)
       print(f"Result: {result}")
+      return SAResponse(result=result, text=prompt)
     except Exception as e:
+      return HTTPException(500, SAResponse(code=500, result=str(e), text=prompt))
   else:
+    return HTTPException(400, SAResponse(code=400, result="Request argument 'prompt' not provided."))
 @app.post('/questions/finance')
     prompt: str = Body(..., embed=True, example="What's the best way to invest my money"),
     temperature: float = Body(0.5, embed=True),
     max_new_tokens: int = Body(200, embed=True)
+) -> QuestionResponse:
   """
   Ask a finetuned Gemma a finance-related question, just for fun.
   NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
   """
   if prompt:
     try:
+      print(f'Asking GemmaFinance with the question "{prompt}"')
+      result = ask_llm(FIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
       print(f"Result: {result}")
+      return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
     except Exception as e:
+      return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
   else:
+    return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
+@app.post('/questions/finance')
+async def ask_gemmaFinanceTH(
+    prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
+    temperature: float = Body(0.5, embed=True),
+    max_new_tokens: int = Body(200, embed=True)
+) -> QuestionResponse:
+  """
+  Ask a finetuned Gemma an open-ended question..
+  NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
+  """
+  if prompt:
+    try:
+      print(f'Asking GemmaWild with the question "{prompt}"')
+      result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
+      print(f"Result: {result}")
+      return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
+    except Exception as e:
+      return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
+  else:
+    return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))