HealthAssistant

Running

App Files Files

reedmayhew commited on Feb 2

Commit

25ef0fa

verified ·

1 Parent(s): b7ef472

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -42

app.py CHANGED Viewed

@@ -40,7 +40,7 @@ h1 {
 }
 """
-# Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("reedmayhew/HealthCare-Reasoning-Assistant-Llama-3.1-8B-HF", device_map="cuda")
 model = AutoModelForCausalLM.from_pretrained("reedmayhew/HealthCare-Reasoning-Assistant-Llama-3.1-8B-HF", device_map="cuda")
@@ -56,22 +56,23 @@ def chat_llama3_8b(message: str,
                    max_new_tokens: int,
                    confirm: bool) -> str:
     """
-    Generate a streaming response using the llama3-8b model.
     Args:
         message (str): The input message.
         history (list): The conversation history.
         temperature (float): The temperature for generating the response.
         max_new_tokens (int): The maximum number of new tokens to generate.
-        confirm (bool): Whether the user has confirmed the age/disclaimer.
-    Returns:
-        str: The generated response.
     """
-    # If the confirmation checkbox is not checked, return a short message immediately.
     if not confirm:
         return "⚠️ You must confirm that you meet the usage requirements before sending a message."
     conversation = []
     for user, assistant in history:
         conversation.extend([
@@ -79,14 +80,15 @@ def chat_llama3_8b(message: str,
             {"role": "assistant", "content": assistant}
         ])
-    # Ensure the model starts with "<think>"
     conversation.append({"role": "user", "content": message})
-    conversation.append({"role": "assistant", "content": "<think> "})  # Force <think> at start
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
@@ -99,47 +101,25 @@ def chat_llama3_8b(message: str,
     if temperature == 0:
         generate_kwargs['do_sample'] = False
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
-    outputs = []
-    buffer = ""
-    think_detected = False
-    thinking_message_sent = False
-    full_response = ""  # Store the full assistant response
     for text in streamer:
-        buffer += text
-        full_response += text  # Store raw assistant response (includes <think>)
-        # Send the "thinking" message once text starts generating
-        if not thinking_message_sent:
-            thinking_message_sent = True
-            yield "A.I. Healthcare is Thinking... Please wait...\n\n"
-        # Wait until </think> is detected before streaming output
-        if not think_detected:
-            print(buffer)
-            if "</think>" in buffer:
-                think_detected = True
-                buffer = buffer.split("</think>", 1)[1]  # Remove <think> section
-        else:
-            outputs.append(text)
-            yield "".join(outputs)
-    # Store the full response (including <think>) in history for context
     history.append((message, full_response))
 # Custom JavaScript to disable the send button until confirmation is given.
-# (The JS waits for the checkbox with a label containing the specified text and then monitors its state.)
 CUSTOM_JS = """
 <script>
 document.addEventListener("DOMContentLoaded", function() {
-    // Poll for the confirmation checkbox and the send button inside the ChatInterface.
     const interval = setInterval(() => {
-        // The checkbox is rendered as an <input type="checkbox"> with an associated label.
         const checkbox = document.querySelector('input[type="checkbox"][aria-label*="I hereby confirm that I am at least 18 years of age"]');
-        // The send button might be a <button> element with a title or specific text. Adjust the selector as needed.
         const sendButton = document.querySelector('button[title="Send"]');
         if (checkbox && sendButton) {
             sendButton.disabled = !checkbox.checked;
@@ -155,10 +135,8 @@ document.addEventListener("DOMContentLoaded", function() {
 with gr.Blocks(css=css, title="A.I. Healthcare") as demo:
     gr.Markdown(DESCRIPTION)
-    # Inject the custom JavaScript.
     gr.HTML(CUSTOM_JS)
-    # The ChatInterface below now includes additional inputs: the confirmation checkbox and the parameter sliders.
     chat_interface = gr.ChatInterface(
         fn=chat_llama3_8b,
         title="A.I. Healthcare Chat",
@@ -173,7 +151,7 @@ with gr.Blocks(css=css, title="A.I. Healthcare") as demo:
                 elem_id="age_confirm_checkbox"
             ),
             gr.Slider(minimum=0.6, maximum=0.6, step=0.1, value=0.6, label="Temperature", visible=False),
-            gr.Slider(minimum=1024, maximum=4096, step=128, value=2048, label="Max new tokens", visible=False),
         ],
         examples=[
             ['What are the common symptoms of diabetes?'],
@@ -183,6 +161,7 @@ with gr.Blocks(css=css, title="A.I. Healthcare") as demo:
             ['What should I know about the side effects of common medications?']
         ],
         cache_examples=False,
     )
     gr.Markdown(LICENSE)

 }
 """
+# Load the tokenizer and model with the updated model name
 tokenizer = AutoTokenizer.from_pretrained("reedmayhew/HealthCare-Reasoning-Assistant-Llama-3.1-8B-HF", device_map="cuda")
 model = AutoModelForCausalLM.from_pretrained("reedmayhew/HealthCare-Reasoning-Assistant-Llama-3.1-8B-HF", device_map="cuda")
                    max_new_tokens: int,
                    confirm: bool) -> str:
     """
+    Generate a streaming response using the Healthcare-Reasoning-Assistant-Llama-3.1-8B-HF model.
     Args:
         message (str): The input message.
         history (list): The conversation history.
         temperature (float): The temperature for generating the response.
         max_new_tokens (int): The maximum number of new tokens to generate.
+        confirm (bool): Whether the user has confirmed the usage disclaimer.
+    Yields:
+        str: The generated response, streamed token-by-token.
     """
+    # Ensure the user has confirmed the disclaimer
     if not confirm:
         return "⚠️ You must confirm that you meet the usage requirements before sending a message."
+    # Prepare the conversation history for the model input
     conversation = []
     for user, assistant in history:
         conversation.extend([
             {"role": "assistant", "content": assistant}
         ])
+    # Append the current user message
     conversation.append({"role": "user", "content": message})
+    # Convert the conversation into input ids using the chat template
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
+    # Set up the streamer to stream text output
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
     if temperature == 0:
         generate_kwargs['do_sample'] = False
+    # Launch the generation in a separate thread
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
+    full_response = ""
+    # Simply stream each token as it comes from the model
     for text in streamer:
+        full_response += text
+        yield text
+    # Save the full response (for context in the conversation history)
     history.append((message, full_response))
 # Custom JavaScript to disable the send button until confirmation is given.
 CUSTOM_JS = """
 <script>
 document.addEventListener("DOMContentLoaded", function() {
     const interval = setInterval(() => {
         const checkbox = document.querySelector('input[type="checkbox"][aria-label*="I hereby confirm that I am at least 18 years of age"]');
         const sendButton = document.querySelector('button[title="Send"]');
         if (checkbox && sendButton) {
             sendButton.disabled = !checkbox.checked;
 with gr.Blocks(css=css, title="A.I. Healthcare") as demo:
     gr.Markdown(DESCRIPTION)
     gr.HTML(CUSTOM_JS)
     chat_interface = gr.ChatInterface(
         fn=chat_llama3_8b,
         title="A.I. Healthcare Chat",
                 elem_id="age_confirm_checkbox"
             ),
             gr.Slider(minimum=0.6, maximum=0.6, step=0.1, value=0.6, label="Temperature", visible=False),
+            gr.Slider(minimum=128, maximum=4096, step=64, value=1024, label="Max new tokens", visible=False),
         ],
         examples=[
             ['What are the common symptoms of diabetes?'],
             ['What should I know about the side effects of common medications?']
         ],
         cache_examples=False,
+        allow_screenshot=False,
     )
     gr.Markdown(LICENSE)