Spaces:

benardo0
/

Nurses

Sleeping

App Files Files Community

benardo0 commited on Jan 22

Commit

030bf70

verified ·

1 Parent(s): 3c8150d

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -56

app.py CHANGED Viewed

@@ -147,13 +147,13 @@
 import os
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
 from typing import List, Dict
 import logging
 import traceback
-# Configure detailed logging to help us track the model's behavior
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
@@ -163,39 +163,48 @@ logger = logging.getLogger(__name__)
 class MedicalAssistant:
     def __init__(self):
         """
-        Initialize the medical assistant using a pre-quantized 4-bit model.
-        This approach uses less memory while maintaining good performance.
         """
         try:
             logger.info("Starting model initialization...")
-            # Define model configuration
             self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
             self.max_length = 2048
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
-            # Log system information for debugging
-            logger.info(f"Using device: {self.device}")
-            logger.info(f"Available CUDA devices: {torch.cuda.device_count() if torch.cuda.is_available() else 'None'}")
-            if torch.cuda.is_available():
-                logger.info(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
-            # Initialize the pipeline for text generation
-            logger.info("Initializing text generation pipeline...")
             self.pipe = pipeline(
                 "text-generation",
-                model=self.model_name,
-                device_map="auto",
-                torch_dtype=torch.float16
             )
-            logger.info("Pipeline initialized successfully!")
-            # Load tokenizer separately for more control over text processing
-            logger.info("Loading tokenizer...")
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            if self.tokenizer.pad_token is None:
-                self.tokenizer.pad_token = self.tokenizer.eos_token
-            logger.info("Tokenizer loaded successfully!")
         except Exception as e:
             logger.error(f"Initialization failed: {str(e)}")
@@ -205,40 +214,33 @@ class MedicalAssistant:
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         """
         Generate a response using the text generation pipeline.
-        The pipeline handles most of the complexity of text generation for us.
         """
         try:
             logger.info("Preparing message for generation")
-            # Prepare the conversation format
-            system_prompt = """You are a medical AI assistant. Respond to medical queries
-            professionally and accurately. If you're unsure, always recommend consulting
-            with a healthcare provider."""
-            # Format messages for the model
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": message}
-            ]
-            # Convert messages to a format the model expects
-            prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
-            prompt += "\nassistant:"
             logger.info("Generating response")
-            # Generate response using the pipeline
             response = self.pipe(
                 prompt,
-                max_new_tokens=512,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
-                repetition_penalty=1.1,
                 pad_token_id=self.tokenizer.pad_token_id
             )[0]["generated_text"]
-            # Extract the assistant's response from the full generated text
-            response = response.split("assistant:")[-1].strip()
             logger.info("Response generated successfully")
             return response
@@ -248,14 +250,11 @@ class MedicalAssistant:
             logger.error(traceback.format_exc())
             return f"I apologize, but I encountered an error: {str(e)}"
-# Initialize our global assistant
 assistant = None
 def initialize_assistant():
-    """
-    Initialize the assistant with error handling and logging.
-    This helps us track any issues during startup.
-    """
     global assistant
     try:
         logger.info("Attempting to initialize assistant")
@@ -268,15 +267,13 @@ def initialize_assistant():
         return False
 def chat_response(message: str, history: List[Dict]):
-    """
-    Handle chat messages and maintain conversation context.
-    """
     global assistant
     if assistant is None:
         logger.info("Assistant not initialized, attempting initialization")
         if not initialize_assistant():
-            return "I apologize, but I'm currently unavailable. The error has been logged for investigation."
     try:
         return assistant.generate_response(message, history)
@@ -285,12 +282,13 @@ def chat_response(message: str, history: List[Dict]):
         logger.error(traceback.format_exc())
         return f"I encountered an error: {str(e)}"
-# Create the Gradio interface with a clean, professional design
 demo = gr.ChatInterface(
     fn=chat_response,
-    title="Medical Assistant (4-bit Quantized Version)",
-    description="""This medical assistant uses a 4-bit quantized model for efficient operation.
-                   It provides medical guidance while ensuring comprehensive health information gathering.""",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
@@ -298,7 +296,7 @@ demo = gr.ChatInterface(
     ]
 )
-# Launch the application
 if __name__ == "__main__":
     logger.info("Starting the application")
     demo.launch()

 import os
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 from typing import List, Dict
 import logging
 import traceback
+# Set up logging to help us track what's happening
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 class MedicalAssistant:
     def __init__(self):
         """
+        Initialize the medical assistant with CPU-friendly settings.
+        We'll use careful memory management and avoid GPU-specific features.
         """
         try:
             logger.info("Starting model initialization...")
+            # Model configuration
             self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
             self.max_length = 2048
+            # First load the tokenizer as it's lighter on memory
+            logger.info("Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            # Handle padding token
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            logger.info("Tokenizer loaded successfully")
+            # Load model with CPU-friendly settings
+            logger.info("Loading model - this may take a few minutes...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype=torch.float32,  # Use float32 for CPU
+                low_cpu_mem_usage=True,
+                trust_remote_code=True
+            )
+            # Create the pipeline with our loaded components
+            logger.info("Creating pipeline...")
             self.pipe = pipeline(
                 "text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=-1,  # Force CPU usage
+                torch_dtype=torch.float32
             )
+            logger.info("Initialization completed successfully!")
         except Exception as e:
             logger.error(f"Initialization failed: {str(e)}")
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         """
         Generate a response using the text generation pipeline.
+        Includes careful error handling and response processing.
         """
         try:
             logger.info("Preparing message for generation")
+            # Create a medical context-aware prompt
+            system_prompt = """You are a medical AI assistant. Provide accurate,
+            professional medical guidance. Always recommend consulting healthcare
+            providers for specific medical advice."""
+            # Format the conversation
+            prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
             logger.info("Generating response")
+            # Generate with conservative settings for CPU
             response = self.pipe(
                 prompt,
+                max_new_tokens=256,  # Reduced for CPU efficiency
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
+                num_return_sequences=1,
                 pad_token_id=self.tokenizer.pad_token_id
             )[0]["generated_text"]
+            # Clean up the response
+            response = response.split("Assistant:")[-1].strip()
             logger.info("Response generated successfully")
             return response
             logger.error(traceback.format_exc())
             return f"I apologize, but I encountered an error: {str(e)}"
+# Global assistant instance
 assistant = None
 def initialize_assistant():
+    """Initialize the assistant with proper error handling"""
     global assistant
     try:
         logger.info("Attempting to initialize assistant")
         return False
 def chat_response(message: str, history: List[Dict]):
+    """Handle chat interactions with error recovery"""
     global assistant
     if assistant is None:
         logger.info("Assistant not initialized, attempting initialization")
         if not initialize_assistant():
+            return "I apologize, but I'm currently unavailable. Please try again later."
     try:
         return assistant.generate_response(message, history)
         logger.error(traceback.format_exc())
         return f"I encountered an error: {str(e)}"
+# Create the Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
+    title="Medical Assistant (CPU Version)",
+    description="""This medical assistant provides guidance and information
+                   about health-related queries. Note that this is running
+                   in CPU mode for broader compatibility.""",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
     ]
 )
+# Launch the interface
 if __name__ == "__main__":
     logger.info("Starting the application")
     demo.launch()