Spaces:

benardo0
/

Nurses

Running

App Files Files Community

benardo0 commited on Jan 22

Commit

7625d6b

verified ·

1 Parent(s): 596865f

Update app.py

Browse files

Files changed (1) hide show

app.py +212 -45

app.py CHANGED Viewed

@@ -1,12 +1,163 @@
 import os
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from typing import List, Dict
 import logging
-# Set up logging to help us debug model loading and inference
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class MedicalAssistant:
@@ -15,62 +166,81 @@ class MedicalAssistant:
         try:
             logger.info("Starting model initialization...")
-            # Model configuration - adjust these based on your available compute
             self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
-            self.max_length = 1048
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
             logger.info(f"Using device: {self.device}")
-            # Load tokenizer first - this is typically faster and can catch issues early
-            logger.info("Loading tokenizer...")
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_name,
-                padding_side="left",
-                trust_remote_code=True
-            )
             # Set padding token if not set
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
-            # Load model with memory optimizations
-            logger.info("Loading model...")
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_name,
-                torch_dtype=torch.float16,
-                device_map="auto",
-                load_in_8bit=True,
-                trust_remote_code=True
-            )
-            logger.info("Model initialization completed successfully!")
         except Exception as e:
-            logger.error(f"Error during initialization: {str(e)}")
             raise
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         """Generate a response to the user's message"""
         try:
             # Prepare the prompt
             system_prompt = """You are a medical AI assistant. Respond to medical queries
             professionally and accurately. If you're unsure, always recommend consulting
             with a healthcare provider."""
-            # Combine system prompt, chat history, and current message
             full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
-            # Tokenize input
             inputs = self.tokenizer(
                 full_prompt,
                 return_tensors="pt",
                 padding=True,
                 truncation=True,
                 max_length=self.max_length
-            ).to(self.device)
-            # Generate response
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
@@ -82,65 +252,62 @@ class MedicalAssistant:
                     repetition_penalty=1.1
                 )
-            # Decode and clean up response
-            response = self.tokenizer.decode(
-                outputs[0],
-                skip_special_tokens=True
-            )
-            # Extract just the assistant's response
             response = response.split("Assistant:")[-1].strip()
             return response
         except Exception as e:
             logger.error(f"Error during response generation: {str(e)}")
-            return f"I apologize, but I encountered an error. Please try again."
-# Initialize the assistant
 assistant = None
 def initialize_assistant():
     """Initialize the assistant and handle any errors"""
     global assistant
     try:
         assistant = MedicalAssistant()
         return True
     except Exception as e:
         logger.error(f"Failed to initialize assistant: {str(e)}")
         return False
 def chat_response(message: str, history: List[Dict]):
     """Handle chat messages and return responses"""
     global assistant
-    # Check if assistant is initialized
     if assistant is None:
         if not initialize_assistant():
-            return "I apologize, but I'm currently unavailable. Please try again later."
     try:
         return assistant.generate_response(message, history)
     except Exception as e:
         logger.error(f"Error in chat response: {str(e)}")
-        return "I encountered an error. Please try again."
 # Create Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
     title="Medical Assistant (Test Version)",
-    description="""This is a test version of the medical assistant.
-                   Please use it to verify basic functionality.""",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
         "What should I do for a mild headache?"
-    ],
-    # retry_btn=None,
-    # undo_btn=None,
-    # clear_btn="Clear"
 )
 # Launch the interface
 if __name__ == "__main__":
     demo.launch()

+# import os
+# import gradio as gr
+# from transformers import AutoModelForCausalLM, AutoTokenizer
+# import torch
+# from typing import List, Dict
+# import logging
+# # Set up logging to help us debug model loading and inference
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)
+# class MedicalAssistant:
+#     def __init__(self):
+#         """Initialize the medical assistant with model and tokenizer"""
+#         try:
+#             logger.info("Starting model initialization...")
+#             # Model configuration - adjust these based on your available compute
+#             self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
+#             self.max_length = 1048
+#             self.device = "cuda" if torch.cuda.is_available() else "cpu"
+#             logger.info(f"Using device: {self.device}")
+#             # Load tokenizer first - this is typically faster and can catch issues early
+#             logger.info("Loading tokenizer...")
+#             self.tokenizer = AutoTokenizer.from_pretrained(
+#                 self.model_name,
+#                 padding_side="left",
+#                 trust_remote_code=True
+#             )
+#             # Set padding token if not set
+#             if self.tokenizer.pad_token is None:
+#                 self.tokenizer.pad_token = self.tokenizer.eos_token
+#             # Load model with memory optimizations
+#             logger.info("Loading model...")
+#             self.model = AutoModelForCausalLM.from_pretrained(
+#                 self.model_name,
+#                 torch_dtype=torch.float16,
+#                 device_map="auto",
+#                 load_in_8bit=True,
+#                 trust_remote_code=True
+#             )
+#             logger.info("Model initialization completed successfully!")
+#         except Exception as e:
+#             logger.error(f"Error during initialization: {str(e)}")
+#             raise
+#     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
+#         """Generate a response to the user's message"""
+#         try:
+#             # Prepare the prompt
+#             system_prompt = """You are a medical AI assistant. Respond to medical queries
+#             professionally and accurately. If you're unsure, always recommend consulting
+#             with a healthcare provider."""
+#             # Combine system prompt, chat history, and current message
+#             full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
+#             # Tokenize input
+#             inputs = self.tokenizer(
+#                 full_prompt,
+#                 return_tensors="pt",
+#                 padding=True,
+#                 truncation=True,
+#                 max_length=self.max_length
+#             ).to(self.device)
+#             # Generate response
+#             with torch.no_grad():
+#                 outputs = self.model.generate(
+#                     **inputs,
+#                     max_new_tokens=512,
+#                     do_sample=True,
+#                     temperature=0.7,
+#                     top_p=0.95,
+#                     pad_token_id=self.tokenizer.pad_token_id,
+#                     repetition_penalty=1.1
+#                 )
+#             # Decode and clean up response
+#             response = self.tokenizer.decode(
+#                 outputs[0],
+#                 skip_special_tokens=True
+#             )
+#             # Extract just the assistant's response
+#             response = response.split("Assistant:")[-1].strip()
+#             return response
+#         except Exception as e:
+#             logger.error(f"Error during response generation: {str(e)}")
+#             return f"I apologize, but I encountered an error. Please try again."
+# # Initialize the assistant
+# assistant = None
+# def initialize_assistant():
+#     """Initialize the assistant and handle any errors"""
+#     global assistant
+#     try:
+#         assistant = MedicalAssistant()
+#         return True
+#     except Exception as e:
+#         logger.error(f"Failed to initialize assistant: {str(e)}")
+#         return False
+# def chat_response(message: str, history: List[Dict]):
+#     """Handle chat messages and return responses"""
+#     global assistant
+#     # Check if assistant is initialized
+#     if assistant is None:
+#         if not initialize_assistant():
+#             return "I apologize, but I'm currently unavailable. Please try again later."
+#     try:
+#         return assistant.generate_response(message, history)
+#     except Exception as e:
+#         logger.error(f"Error in chat response: {str(e)}")
+#         return "I encountered an error. Please try again."
+# # Create Gradio interface
+# demo = gr.ChatInterface(
+#     fn=chat_response,
+#     title="Medical Assistant (Test Version)",
+#     description="""This is a test version of the medical assistant.
+#                    Please use it to verify basic functionality.""",
+#     examples=[
+#         "What are the symptoms of malaria?",
+#         "How can I prevent type 2 diabetes?",
+#         "What should I do for a mild headache?"
+#     ],
+#     # retry_btn=None,
+#     # undo_btn=None,
+#     # clear_btn="Clear"
+# )
+# # Launch the interface
+# if __name__ == "__main__":
+#     demo.launch()
 import os
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from typing import List, Dict
 import logging
+import traceback
+# Configure detailed logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 logger = logging.getLogger(__name__)
 class MedicalAssistant:
         try:
             logger.info("Starting model initialization...")
+            # Model configuration
             self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
+            self.max_length = 2048
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
             logger.info(f"Using device: {self.device}")
+            logger.info(f"Available CUDA devices: {torch.cuda.device_count() if torch.cuda.is_available() else 'None'}")
+            if torch.cuda.is_available():
+                logger.info(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
+            # First, verify the model exists
+            logger.info(f"Attempting to load tokenizer from {self.model_name}")
+            try:
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    self.model_name,
+                    trust_remote_code=True
+                )
+                logger.info("Tokenizer loaded successfully")
+            except Exception as e:
+                logger.error(f"Failed to load tokenizer: {str(e)}")
+                logger.error(traceback.format_exc())
+                raise
             # Set padding token if not set
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
+                logger.info("Set padding token to EOS token")
+            # Load model with more conservative settings
+            logger.info("Loading model - this may take a few minutes...")
+            try:
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    torch_dtype=torch.float16,
+                    device_map="auto",
+                    load_in_4bit=True,  # More conservative than 8-bit
+                    trust_remote_code=True,
+                    low_cpu_mem_usage=True
+                )
+                logger.info("Model loaded successfully!")
+            except Exception as e:
+                logger.error(f"Failed to load model: {str(e)}")
+                logger.error(traceback.format_exc())
+                raise
         except Exception as e:
+            logger.error(f"Initialization failed: {str(e)}")
+            logger.error(traceback.format_exc())
             raise
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         """Generate a response to the user's message"""
         try:
+            logger.info("Generating response for message")
             # Prepare the prompt
             system_prompt = """You are a medical AI assistant. Respond to medical queries
             professionally and accurately. If you're unsure, always recommend consulting
             with a healthcare provider."""
             full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
+            logger.info("Tokenizing input")
             inputs = self.tokenizer(
                 full_prompt,
                 return_tensors="pt",
                 padding=True,
                 truncation=True,
                 max_length=self.max_length
+            )
+            # Move inputs to the correct device
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            logger.info("Generating response")
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
                     repetition_penalty=1.1
                 )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             response = response.split("Assistant:")[-1].strip()
+            logger.info("Response generated successfully")
             return response
         except Exception as e:
             logger.error(f"Error during response generation: {str(e)}")
+            logger.error(traceback.format_exc())
+            return f"I apologize, but I encountered an error: {str(e)}"
+# Global variable for the assistant
 assistant = None
 def initialize_assistant():
     """Initialize the assistant and handle any errors"""
     global assistant
     try:
+        logger.info("Attempting to initialize assistant")
         assistant = MedicalAssistant()
+        logger.info("Assistant initialized successfully")
         return True
     except Exception as e:
         logger.error(f"Failed to initialize assistant: {str(e)}")
+        logger.error(traceback.format_exc())
         return False
 def chat_response(message: str, history: List[Dict]):
     """Handle chat messages and return responses"""
     global assistant
     if assistant is None:
+        logger.info("Assistant not initialized, attempting initialization")
         if not initialize_assistant():
+            return "I apologize, but I'm currently unavailable. The error has been logged for investigation."
     try:
         return assistant.generate_response(message, history)
     except Exception as e:
         logger.error(f"Error in chat response: {str(e)}")
+        logger.error(traceback.format_exc())
+        return f"I encountered an error: {str(e)}"
 # Create Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
     title="Medical Assistant (Test Version)",
+    description="This is a test version of the medical assistant. Please use it to verify basic functionality.",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
         "What should I do for a mild headache?"
+    ]
 )
 # Launch the interface
 if __name__ == "__main__":
+    logger.info("Starting the application")
     demo.launch()