Spaces:

Mattral
/

Omdena-Phoenix

Sleeping

App Files Files Community

Mattral commited on May 20, 2024

Commit

36eb6b1

verified ·

1 Parent(s): ff96349

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -24

app.py CHANGED Viewed

@@ -1,18 +1,32 @@
 import gradio as gr
 from typing import Iterator, List, Tuple
-# Mock model function to simulate response generation
-def mock_run(
-    message: str,
-    chat_history: List[Tuple[str, str]],
-    system_prompt: str,
-    max_new_tokens: int,
-    temperature: float,
-    top_p: float,
-    top_k: int,
-) -> Iterator[str]:
-    response = f"Mock response to: {message}"
-    yield response
 DEFAULT_SYSTEM_PROMPT = "You are Phoenix AI Healthcare. You are professional, you are polite, give only truthful information and are based on the Mistral-7B model from Mistral AI about Healtcare and Wellness. You can communicate in different languages equally well."
@@ -22,7 +36,7 @@ MAX_INPUT_TOKEN_LENGTH = 4000
 DESCRIPTION = """
 # Simple Healthcare Chatbot
-### Powered by a mock model
 """
 def clear_and_save_textbox(message: str) -> tuple[str, str]:
@@ -52,21 +66,26 @@ def generate(
         raise ValueError("Max new tokens exceeded")
     history = history_with_input[:-1]
-    generator = mock_run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
-    try:
-        first_response = next(generator)
-        yield history + [(message, first_response)]
-    except StopIteration:
-        yield history + [(message, "")]
-    for response in generator:
-        yield history + [(message, response)]
 def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
-    input_token_length = len(message) + sum(len(msg) for msg, _ in chat_history)
     if input_token_length > MAX_INPUT_TOKEN_LENGTH:
         raise gr.Error(f"The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.")
-with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
     gr.Button("Duplicate Space for private use", elem_id="duplicate-button")

 import gradio as gr
 from typing import Iterator, List, Tuple
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftConfig, PeftModel
+base_model = "mistralai/Mistral-7B-Instruct-v0.2"
+adapter = "GRMenon/mental-health-mistral-7b-instructv0.2-finetuned-V2"
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+    base_model,
+    add_bos_token=True,
+    trust_remote_code=True,
+    padding_side='left'
+)
+# Create peft model using base_model and finetuned adapter
+config = PeftConfig.from_pretrained(adapter)
+model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
+                                             load_in_4bit=True,
+                                             device_map='auto',
+                                             torch_dtype='auto')
+model = PeftModel.from_pretrained(model, adapter)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+model.eval()
 DEFAULT_SYSTEM_PROMPT = "You are Phoenix AI Healthcare. You are professional, you are polite, give only truthful information and are based on the Mistral-7B model from Mistral AI about Healtcare and Wellness. You can communicate in different languages equally well."
 DESCRIPTION = """
 # Simple Healthcare Chatbot
+### Powered by Mistral-7B with Healthcare Fine-Tuning
 """
 def clear_and_save_textbox(message: str) -> tuple[str, str]:
         raise ValueError("Max new tokens exceeded")
     history = history_with_input[:-1]
+    conversation = [{"role": "system", "content": system_prompt}] + \
+                   [{"role": "user", "content": user_input} for user_input, _ in history] + \
+                   [{"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(conversation=conversation,
+                                              tokenize=True,
+                                              add_generation_prompt=True,
+                                              return_tensors='pt').to(device)
+    output_ids = model.generate(input_ids=input_ids, max_new_tokens=max_new_tokens,
+                                do_sample=True, pad_token_id=tokenizer.pad_token_id)
+    response = tokenizer.batch_decode(output_ids.detach().cpu().numpy(), skip_special_tokens=True)
+    response_text = response[0]
+    yield history + [(message, response_text)]
 def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
+    input_token_length = len(tokenizer.encode(message)) + sum(len(tokenizer.encode(msg)) for msg, _ in chat_history)
     if input_token_length > MAX_INPUT_TOKEN_LENGTH:
         raise gr.Error(f"The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.")
+with gr.Blocks(css="./styles/style.css") as demo:  # Link to CSS file
     gr.Markdown(DESCRIPTION)
     gr.Button("Duplicate Space for private use", elem_id="duplicate-button")