Spaces:

benardo0
/

Nurses

Sleeping

App Files Files Community

benardo0 commited on Jan 22

Commit

d40e9bd

verified ·

1 Parent(s): 10ffd90

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -213

app.py CHANGED Viewed

@@ -1,159 +1,11 @@
-# import os
-# import gradio as gr
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# import torch
-# from typing import List, Dict
-# import logging
-# # Set up logging to help us debug model loading and inference
-# logging.basicConfig(level=logging.INFO)
-# logger = logging.getLogger(__name__)
-# class MedicalAssistant:
-#     def __init__(self):
-#         """Initialize the medical assistant with model and tokenizer"""
-#         try:
-#             logger.info("Starting model initialization...")
-#             # Model configuration - adjust these based on your available compute
-#             self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
-#             self.max_length = 1048
-#             self.device = "cuda" if torch.cuda.is_available() else "cpu"
-#             logger.info(f"Using device: {self.device}")
-#             # Load tokenizer first - this is typically faster and can catch issues early
-#             logger.info("Loading tokenizer...")
-#             self.tokenizer = AutoTokenizer.from_pretrained(
-#                 self.model_name,
-#                 padding_side="left",
-#                 trust_remote_code=True
-#             )
-#             # Set padding token if not set
-#             if self.tokenizer.pad_token is None:
-#                 self.tokenizer.pad_token = self.tokenizer.eos_token
-#             # Load model with memory optimizations
-#             logger.info("Loading model...")
-#             self.model = AutoModelForCausalLM.from_pretrained(
-#                 self.model_name,
-#                 torch_dtype=torch.float16,
-#                 device_map="auto",
-#                 load_in_8bit=True,
-#                 trust_remote_code=True
-#             )
-#             logger.info("Model initialization completed successfully!")
-#         except Exception as e:
-#             logger.error(f"Error during initialization: {str(e)}")
-#             raise
-#     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
-#         """Generate a response to the user's message"""
-#         try:
-#             # Prepare the prompt
-#             system_prompt = """You are a medical AI assistant. Respond to medical queries
-#             professionally and accurately. If you're unsure, always recommend consulting
-#             with a healthcare provider."""
-#             # Combine system prompt, chat history, and current message
-#             full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
-#             # Tokenize input
-#             inputs = self.tokenizer(
-#                 full_prompt,
-#                 return_tensors="pt",
-#                 padding=True,
-#                 truncation=True,
-#                 max_length=self.max_length
-#             ).to(self.device)
-#             # Generate response
-#             with torch.no_grad():
-#                 outputs = self.model.generate(
-#                     **inputs,
-#                     max_new_tokens=512,
-#                     do_sample=True,
-#                     temperature=0.7,
-#                     top_p=0.95,
-#                     pad_token_id=self.tokenizer.pad_token_id,
-#                     repetition_penalty=1.1
-#                 )
-#             # Decode and clean up response
-#             response = self.tokenizer.decode(
-#                 outputs[0],
-#                 skip_special_tokens=True
-#             )
-#             # Extract just the assistant's response
-#             response = response.split("Assistant:")[-1].strip()
-#             return response
-#         except Exception as e:
-#             logger.error(f"Error during response generation: {str(e)}")
-#             return f"I apologize, but I encountered an error. Please try again."
-# # Initialize the assistant
-# assistant = None
-# def initialize_assistant():
-#     """Initialize the assistant and handle any errors"""
-#     global assistant
-#     try:
-#         assistant = MedicalAssistant()
-#         return True
-#     except Exception as e:
-#         logger.error(f"Failed to initialize assistant: {str(e)}")
-#         return False
-# def chat_response(message: str, history: List[Dict]):
-#     """Handle chat messages and return responses"""
-#     global assistant
-#     # Check if assistant is initialized
-#     if assistant is None:
-#         if not initialize_assistant():
-#             return "I apologize, but I'm currently unavailable. Please try again later."
-#     try:
-#         return assistant.generate_response(message, history)
-#     except Exception as e:
-#         logger.error(f"Error in chat response: {str(e)}")
-#         return "I encountered an error. Please try again."
-# # Create Gradio interface
-# demo = gr.ChatInterface(
-#     fn=chat_response,
-#     title="Medical Assistant (Test Version)",
-#     description="""This is a test version of the medical assistant.
-#                    Please use it to verify basic functionality.""",
-#     examples=[
-#         "What are the symptoms of malaria?",
-#         "How can I prevent type 2 diabetes?",
-#         "What should I do for a mild headache?"
-#     ],
-#     # retry_btn=None,
-#     # undo_btn=None,
-#     # clear_btn="Clear"
-# )
-# # Launch the interface
-# if __name__ == "__main__":
-#     demo.launch()
 import os
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import torch
 from typing import List, Dict
 import logging
 import traceback
-# Set up logging to help us understand what's happening in our application
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
@@ -163,40 +15,39 @@ logger = logging.getLogger(__name__)
 class MedicalAssistant:
     def __init__(self):
         """
-        Initialize a basic medical assistant for CPU-only environments.
-        This version uses standard model loading without quantization for maximum compatibility.
         """
         try:
-            logger.info("Starting basic model initialization...")
-            # Define our model configuration
             self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
             self.max_length = 2048
-            # First load the tokenizer since it's lighter on memory
             logger.info("Loading tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
                 self.model_name,
-                token=os.getenv('HUGGING_FACE_TOKEN')
             )
-            # Handle padding token setup
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
-            # Initialize pipeline with basic CPU settings
-            logger.info("Initializing CPU-based pipeline...")
             self.pipe = pipeline(
                 "text-generation",
-                model=self.model_name,
-                token=os.getenv('HUGGING_FACE_TOKEN'),
-                device_map="cpu",           # Explicitly use CPU
-                torch_dtype=torch.float32,  # Use standard precision
-                use_safetensors=True,      # Enable safetensors for better memory handling
-                # Removed all quantization settings
             )
-            logger.info("Medical Assistant initialized successfully in basic CPU mode!")
         except Exception as e:
             logger.error(f"Initialization failed: {str(e)}")
@@ -204,68 +55,38 @@ class MedicalAssistant:
             raise
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
-        """
-        Generate responses using basic CPU-friendly settings.
-        This method focuses on stability over speed, using conservative parameters.
-        """
         try:
-            logger.info("Preparing message for generation")
-            # Create our medical context prompt
-            system_prompt = """You are a medical AI assistant trained on medical knowledge.
-            Provide accurate, professional medical guidance while acknowledging limitations.
-            Always recommend consulting healthcare providers for specific medical advice."""
-            # Format our conversation for the model
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": message}
-            ]
-            # Add recent chat history if available
-            if chat_history:
-                # Only keep recent history to manage memory
-                recent_history = chat_history[-2:]  # Keep last 2 exchanges
-                for chat in recent_history:
-                    messages.append({
-                        "role": "user" if chat["role"] == "user" else "assistant",
-                        "content": chat["content"]
-                    })
-            logger.info("Generating response with basic settings")
-            # Generate with conservative parameters
             response = self.pipe(
-                messages,
-                max_new_tokens=100,         # Conservative token limit
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
-                num_beams=1,                # Single beam for simplicity
                 pad_token_id=self.tokenizer.pad_token_id
             )[0]["generated_text"]
-            # Clean up our response
-            response = response.split("assistant:")[-1].strip()
-            logger.info("Response generated successfully")
-            return response
         except Exception as e:
             logger.error(f"Error during response generation: {str(e)}")
             logger.error(traceback.format_exc())
             return f"I apologize, but I encountered an error: {str(e)}"
-# Initialize our assistant
 assistant = None
 def initialize_assistant():
-    """Initialize the assistant with careful error handling"""
     global assistant
     try:
-        logger.info("Attempting to initialize basic CPU assistant")
         assistant = MedicalAssistant()
-        logger.info("Assistant initialized successfully")
         return True
     except Exception as e:
         logger.error(f"Failed to initialize assistant: {str(e)}")
@@ -273,7 +94,6 @@ def initialize_assistant():
         return False
 def chat_response(message: str, history: List[Dict]):
-    """Handle chat interactions with proper error recovery"""
     global assistant
     if assistant is None:
@@ -288,12 +108,11 @@ def chat_response(message: str, history: List[Dict]):
         logger.error(traceback.format_exc())
         return f"I encountered an error: {str(e)}"
-# Create our Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
-    title="Medical Assistant (Basic CPU Version)",
-    description="""This medical assistant provides medical guidance using a basic CPU configuration.
-                   Responses may take longer but will be stable and reliable.""",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
@@ -301,7 +120,7 @@ demo = gr.ChatInterface(
     ]
 )
-# Launch our interface
 if __name__ == "__main__":
-    logger.info("Starting the basic CPU application")
     demo.launch()

 import os
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from typing import List, Dict
 import logging
 import traceback
+# Set up basic logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 class MedicalAssistant:
     def __init__(self):
         """
+        Initialize the medical assistant with the pre-quantized model.
+        Designed for CPU-only environment on Hugging Face's free tier.
         """
         try:
+            logger.info("Starting model initialization...")
+            # Using the pre-quantized model - no need for additional quantization
             self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
             self.max_length = 2048
             logger.info("Loading tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
                 self.model_name,
+                trust_remote_code=True
             )
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
+            logger.info("Loading model...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            logger.info("Creating pipeline...")
             self.pipe = pipeline(
                 "text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer
             )
+            logger.info("Initialization completed successfully!")
         except Exception as e:
             logger.error(f"Initialization failed: {str(e)}")
             raise
     def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
         try:
+            system_prompt = """You are a medical AI assistant. Provide accurate,
+            professional medical guidance. Always recommend consulting healthcare
+            providers for specific medical advice."""
+            prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
             response = self.pipe(
+                prompt,
+                max_new_tokens=256,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.95,
+                num_return_sequences=1,
                 pad_token_id=self.tokenizer.pad_token_id
             )[0]["generated_text"]
+            return response.split("Assistant:")[-1].strip()
         except Exception as e:
             logger.error(f"Error during response generation: {str(e)}")
             logger.error(traceback.format_exc())
             return f"I apologize, but I encountered an error: {str(e)}"
+# Global assistant instance
 assistant = None
 def initialize_assistant():
     global assistant
     try:
+        logger.info("Attempting to initialize assistant")
         assistant = MedicalAssistant()
         return True
     except Exception as e:
         logger.error(f"Failed to initialize assistant: {str(e)}")
         return False
 def chat_response(message: str, history: List[Dict]):
     global assistant
     if assistant is None:
         logger.error(traceback.format_exc())
         return f"I encountered an error: {str(e)}"
+# Create the Gradio interface
 demo = gr.ChatInterface(
     fn=chat_response,
+    title="NURSEOGE",
+    description="This medical assistant provides guidance and information about health-related queries.",
     examples=[
         "What are the symptoms of malaria?",
         "How can I prevent type 2 diabetes?",
     ]
 )
+# Launch the interface
 if __name__ == "__main__":
+    logger.info("Starting the application")
     demo.launch()