import os import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from typing import List, Dict import logging import traceback # Set up basic logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class MedicalAssistant: def __init__(self): """ Initialize the medical assistant with the pre-quantized model. Designed for CPU-only environment on Hugging Face's free tier. """ try: logger.info("Starting model initialization...") # Using the pre-quantized model - no need for additional quantization self.model_name = "emircanerol/Llama3-Med42-8B-4bit" self.max_length = 2048 logger.info("Loading tokenizer...") self.tokenizer = AutoTokenizer.from_pretrained( self.model_name, trust_remote_code=True ) if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token logger.info("Loading model...") self.model = AutoModelForCausalLM.from_pretrained( self.model_name, trust_remote_code=True ) logger.info("Creating pipeline...") self.pipe = pipeline( "text-generation", model=self.model, tokenizer=self.tokenizer ) logger.info("Initialization completed successfully!") except Exception as e: logger.error(f"Initialization failed: {str(e)}") logger.error(traceback.format_exc()) raise def generate_response(self, message: str, chat_history: List[Dict] = None) -> str: try: system_prompt = """You are a medical AI assistant. Provide accurate, professional medical guidance. Always recommend consulting healthcare providers for specific medical advice.""" prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:" response = self.pipe( prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95, num_return_sequences=1, pad_token_id=self.tokenizer.pad_token_id )[0]["generated_text"] return response.split("Assistant:")[-1].strip() except Exception as e: logger.error(f"Error during response generation: {str(e)}") logger.error(traceback.format_exc()) return f"I apologize, but I encountered an error: {str(e)}" # Global assistant instance assistant = None def initialize_assistant(): global assistant try: logger.info("Attempting to initialize assistant") assistant = MedicalAssistant() return True except Exception as e: logger.error(f"Failed to initialize assistant: {str(e)}") logger.error(traceback.format_exc()) return False def chat_response(message: str, history: List[Dict]): global assistant if assistant is None: logger.info("Assistant not initialized, attempting initialization") if not initialize_assistant(): return "I apologize, but I'm currently unavailable. Please try again later." try: return assistant.generate_response(message, history) except Exception as e: logger.error(f"Error in chat response: {str(e)}") logger.error(traceback.format_exc()) return f"I encountered an error: {str(e)}" # Create the Gradio interface demo = gr.ChatInterface( fn=chat_response, title="NURSEOGE", description="This medical assistant provides guidance and information about health-related queries.", examples=[ "What are the symptoms of malaria?", "How can I prevent type 2 diabetes?", "What should I do for a mild headache?" ] ) # Launch the interface if __name__ == "__main__": logger.info("Starting the application") demo.launch()