Spaces:

benardo0
/

Nurses

Sleeping

App Files Files Community

benardo0 commited on Jan 22

Commit

10ffd90

verified ·

1 Parent(s): 5edea36

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -50

app.py CHANGED Viewed

@@ -153,7 +153,7 @@ from typing import List, Dict
 import logging
 import traceback
-# Set up logging to help us track what's happening
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
@@ -163,42 +163,40 @@ logger = logging.getLogger(__name__)
 class MedicalAssistant:
     def __init__(self):
         """
-        Initialize the medical assistant with the Llama3-Med42 model.
-        This model is specifically trained on medical data and quantized to 4-bit precision
-        for better memory efficiency while maintaining good performance.
         """
         try:
-            logger.info("Starting model initialization...")
-            # Updated model to use Llama3-Med42
             self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
             self.max_length = 2048
-            # Initialize the pipeline for simplified text generation
-            # The pipeline handles tokenizer and model loading automatically
-            logger.info("Initializing pipeline...")
-            self.pipe = pipeline(
-                "text-generation",
-                model=self.model_name,
-                token=os.getenv('HUGGING_FACE_TOKEN'),
-                device_map="auto",
-                torch_dtype=torch.float16,  # Use half precision for 4-bit model
-                load_in_4bit=True          # Enable 4-bit quantization
-            )
-            # Load tokenizer separately for more control over text processing
             logger.info("Loading tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
                 self.model_name,
-                token=os.getenv('HUGGING_FACE_TOKEN'),
-                trust_remote_code=True
             )
-            # Ensure proper padding token configuration
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
-            logger.info("Medical Assistant initialized successfully!")
         except Exception as e:
             logger.error(f"Initialization failed: {str(e)}")
@@ -207,44 +205,47 @@ class MedicalAssistant:
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         """
-        Generate a response using the Llama3-Med42 pipeline.
-        This method formats the conversation history and generates appropriate medical responses.
         """
         try:
             logger.info("Preparing message for generation")
-            # Create a medical context-aware prompt
-            system_prompt = """You are a medical AI assistant based on Llama3,
-            specifically trained on medical knowledge. Provide accurate, professional
-            medical guidance while acknowledging limitations. Always recommend
-            consulting healthcare providers for specific medical advice."""
-            # Format the conversation for the model
             messages = [
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": message}
             ]
-            # Add chat history if available
             if chat_history:
-                for chat in chat_history:
                     messages.append({
                         "role": "user" if chat["role"] == "user" else "assistant",
                         "content": chat["content"]
                     })
-            logger.info("Generating response")
-            # Generate response using the pipeline
             response = self.pipe(
                 messages,
-                max_new_tokens=256,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
-                repetition_penalty=1.1
             )[0]["generated_text"]
-            # Clean up the response by extracting the last assistant message
             response = response.split("assistant:")[-1].strip()
             logger.info("Response generated successfully")
@@ -255,14 +256,14 @@ class MedicalAssistant:
             logger.error(traceback.format_exc())
             return f"I apologize, but I encountered an error: {str(e)}"
-# Initialize the assistant
 assistant = None
 def initialize_assistant():
-    """Initialize the assistant with proper error handling"""
     global assistant
     try:
-        logger.info("Attempting to initialize assistant")
         assistant = MedicalAssistant()
         logger.info("Assistant initialized successfully")
         return True
@@ -272,7 +273,7 @@ def initialize_assistant():
         return False
 def chat_response(message: str, history: List[Dict]):
-    """Handle chat interactions with error recovery"""
     global assistant
     if assistant is None:
@@ -287,14 +288,12 @@ def chat_response(message: str, history: List[Dict]):
         logger.error(traceback.format_exc())
         return f"I encountered an error: {str(e)}"
-# Create the Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
-    title="Medical Assistant (NURSEOGE)",
-    description="""This medical assistant is powered by NURSEOGE,
-                   a model specifically trained on medical knowledge. It provides
-                   guidance and information about health-related queries while
-                   maintaining professional medical standards.""",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
@@ -302,7 +301,7 @@ demo = gr.ChatInterface(
     ]
 )
-# Launch the interface
 if __name__ == "__main__":
-    logger.info("Starting the application")
     demo.launch()

 import logging
 import traceback
+# Set up logging to help us understand what's happening in our application
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 class MedicalAssistant:
     def __init__(self):
         """
+        Initialize a basic medical assistant for CPU-only environments.
+        This version uses standard model loading without quantization for maximum compatibility.
         """
         try:
+            logger.info("Starting basic model initialization...")
+            # Define our model configuration
             self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
             self.max_length = 2048
+            # First load the tokenizer since it's lighter on memory
             logger.info("Loading tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
                 self.model_name,
+                token=os.getenv('HUGGING_FACE_TOKEN')
             )
+            # Handle padding token setup
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Initialize pipeline with basic CPU settings
+            logger.info("Initializing CPU-based pipeline...")
+            self.pipe = pipeline(
+                "text-generation",
+                model=self.model_name,
+                token=os.getenv('HUGGING_FACE_TOKEN'),
+                device_map="cpu",           # Explicitly use CPU
+                torch_dtype=torch.float32,  # Use standard precision
+                use_safetensors=True,      # Enable safetensors for better memory handling
+                # Removed all quantization settings
+            )
+            logger.info("Medical Assistant initialized successfully in basic CPU mode!")
         except Exception as e:
             logger.error(f"Initialization failed: {str(e)}")
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         """
+        Generate responses using basic CPU-friendly settings.
+        This method focuses on stability over speed, using conservative parameters.
         """
         try:
             logger.info("Preparing message for generation")
+            # Create our medical context prompt
+            system_prompt = """You are a medical AI assistant trained on medical knowledge.
+            Provide accurate, professional medical guidance while acknowledging limitations.
+            Always recommend consulting healthcare providers for specific medical advice."""
+            # Format our conversation for the model
             messages = [
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": message}
             ]
+            # Add recent chat history if available
             if chat_history:
+                # Only keep recent history to manage memory
+                recent_history = chat_history[-2:]  # Keep last 2 exchanges
+                for chat in recent_history:
                     messages.append({
                         "role": "user" if chat["role"] == "user" else "assistant",
                         "content": chat["content"]
                     })
+            logger.info("Generating response with basic settings")
+            # Generate with conservative parameters
             response = self.pipe(
                 messages,
+                max_new_tokens=100,         # Conservative token limit
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
+                num_beams=1,                # Single beam for simplicity
+                pad_token_id=self.tokenizer.pad_token_id
             )[0]["generated_text"]
+            # Clean up our response
             response = response.split("assistant:")[-1].strip()
             logger.info("Response generated successfully")
             logger.error(traceback.format_exc())
             return f"I apologize, but I encountered an error: {str(e)}"
+# Initialize our assistant
 assistant = None
 def initialize_assistant():
+    """Initialize the assistant with careful error handling"""
     global assistant
     try:
+        logger.info("Attempting to initialize basic CPU assistant")
         assistant = MedicalAssistant()
         logger.info("Assistant initialized successfully")
         return True
         return False
 def chat_response(message: str, history: List[Dict]):
+    """Handle chat interactions with proper error recovery"""
     global assistant
     if assistant is None:
         logger.error(traceback.format_exc())
         return f"I encountered an error: {str(e)}"
+# Create our Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
+    title="Medical Assistant (Basic CPU Version)",
+    description="""This medical assistant provides medical guidance using a basic CPU configuration.
+                   Responses may take longer but will be stable and reliable.""",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
     ]
 )
+# Launch our interface
 if __name__ == "__main__":
+    logger.info("Starting the basic CPU application")
     demo.launch()