Nurses / app.py
benardo0's picture
Update app.py
d40e9bd verified
raw
history blame
4.3 kB
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from typing import List, Dict
import logging
import traceback
# Set up basic logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class MedicalAssistant:
def __init__(self):
"""
Initialize the medical assistant with the pre-quantized model.
Designed for CPU-only environment on Hugging Face's free tier.
"""
try:
logger.info("Starting model initialization...")
# Using the pre-quantized model - no need for additional quantization
self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
self.max_length = 2048
logger.info("Loading tokenizer...")
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
trust_remote_code=True
)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
logger.info("Loading model...")
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
trust_remote_code=True
)
logger.info("Creating pipeline...")
self.pipe = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer
)
logger.info("Initialization completed successfully!")
except Exception as e:
logger.error(f"Initialization failed: {str(e)}")
logger.error(traceback.format_exc())
raise
def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
try:
system_prompt = """You are a medical AI assistant. Provide accurate,
professional medical guidance. Always recommend consulting healthcare
providers for specific medical advice."""
prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
response = self.pipe(
prompt,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
num_return_sequences=1,
pad_token_id=self.tokenizer.pad_token_id
)[0]["generated_text"]
return response.split("Assistant:")[-1].strip()
except Exception as e:
logger.error(f"Error during response generation: {str(e)}")
logger.error(traceback.format_exc())
return f"I apologize, but I encountered an error: {str(e)}"
# Global assistant instance
assistant = None
def initialize_assistant():
global assistant
try:
logger.info("Attempting to initialize assistant")
assistant = MedicalAssistant()
return True
except Exception as e:
logger.error(f"Failed to initialize assistant: {str(e)}")
logger.error(traceback.format_exc())
return False
def chat_response(message: str, history: List[Dict]):
global assistant
if assistant is None:
logger.info("Assistant not initialized, attempting initialization")
if not initialize_assistant():
return "I apologize, but I'm currently unavailable. Please try again later."
try:
return assistant.generate_response(message, history)
except Exception as e:
logger.error(f"Error in chat response: {str(e)}")
logger.error(traceback.format_exc())
return f"I encountered an error: {str(e)}"
# Create the Gradio interface
demo = gr.ChatInterface(
fn=chat_response,
title="NURSEOGE",
description="This medical assistant provides guidance and information about health-related queries.",
examples=[
"What are the symptoms of malaria?",
"How can I prevent type 2 diabetes?",
"What should I do for a mild headache?"
]
)
# Launch the interface
if __name__ == "__main__":
logger.info("Starting the application")
demo.launch()