File size: 4,300 Bytes
c8d430c
b4ff37d
030bf70
b4ff37d
 
7625d6b
a444494
d40e9bd
7625d6b
 
 
 
b4ff37d
a444494
b4ff37d
a444494
803e48a
d40e9bd
 
803e48a
c8d430c
d40e9bd
b4ff37d
d40e9bd
4b15044
7625d6b
b4ff37d
030bf70
 
 
d40e9bd
030bf70
 
 
 
d40e9bd
 
 
 
 
 
10ffd90
d40e9bd
10ffd90
 
d40e9bd
 
10ffd90
d40e9bd
 
b4ff37d
 
7625d6b
 
b4ff37d
a444494
b4ff37d
b1de9b2
d40e9bd
 
 
7625d6b
d40e9bd
7625d6b
4b15044
d40e9bd
 
4b15044
 
 
d40e9bd
10ffd90
4b15044
b4ff37d
d40e9bd
b4ff37d
 
 
7625d6b
 
b4ff37d
d40e9bd
b4ff37d
a444494
b4ff37d
 
e53bd9c
d40e9bd
b4ff37d
 
e53bd9c
b4ff37d
7625d6b
b4ff37d
c8d430c
b4ff37d
 
c8d430c
b4ff37d
7625d6b
b4ff37d
030bf70
a444494
b4ff37d
 
 
 
7625d6b
 
a444494
d40e9bd
a444494
b4ff37d
d40e9bd
 
94dc8bb
b4ff37d
 
 
7625d6b
a444494
 
d40e9bd
a444494
d40e9bd
b4ff37d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from typing import List, Dict
import logging
import traceback

# Set up basic logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class MedicalAssistant:
    def __init__(self):
        """
        Initialize the medical assistant with the pre-quantized model.
        Designed for CPU-only environment on Hugging Face's free tier.
        """
        try:
            logger.info("Starting model initialization...")
            
            # Using the pre-quantized model - no need for additional quantization
            self.model_name = "emircanerol/Llama3-Med42-8B-4bit"
            self.max_length = 2048
            
            logger.info("Loading tokenizer...")
            self.tokenizer = AutoTokenizer.from_pretrained(
                self.model_name,
                trust_remote_code=True
            )
            
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
                
            logger.info("Loading model...")
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                trust_remote_code=True
            )
            
            logger.info("Creating pipeline...")
            self.pipe = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer
            )
            
            logger.info("Initialization completed successfully!")
            
        except Exception as e:
            logger.error(f"Initialization failed: {str(e)}")
            logger.error(traceback.format_exc())
            raise

    def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
        try:
            system_prompt = """You are a medical AI assistant. Provide accurate, 
            professional medical guidance. Always recommend consulting healthcare 
            providers for specific medical advice."""
            
            prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
            
            response = self.pipe(
                prompt,
                max_new_tokens=256,
                do_sample=True,
                temperature=0.7,
                top_p=0.95,
                num_return_sequences=1,
                pad_token_id=self.tokenizer.pad_token_id
            )[0]["generated_text"]
            
            return response.split("Assistant:")[-1].strip()
            
        except Exception as e:
            logger.error(f"Error during response generation: {str(e)}")
            logger.error(traceback.format_exc())
            return f"I apologize, but I encountered an error: {str(e)}"

# Global assistant instance
assistant = None

def initialize_assistant():
    global assistant
    try:
        logger.info("Attempting to initialize assistant")
        assistant = MedicalAssistant()
        return True
    except Exception as e:
        logger.error(f"Failed to initialize assistant: {str(e)}")
        logger.error(traceback.format_exc())
        return False

def chat_response(message: str, history: List[Dict]):
    global assistant
    
    if assistant is None:
        logger.info("Assistant not initialized, attempting initialization")
        if not initialize_assistant():
            return "I apologize, but I'm currently unavailable. Please try again later."
    
    try:
        return assistant.generate_response(message, history)
    except Exception as e:
        logger.error(f"Error in chat response: {str(e)}")
        logger.error(traceback.format_exc())
        return f"I encountered an error: {str(e)}"

# Create the Gradio interface
demo = gr.ChatInterface(
    fn=chat_response,
    title="NURSEOGE",
    description="This medical assistant provides guidance and information about health-related queries.",
    examples=[
        "What are the symptoms of malaria?",
        "How can I prevent type 2 diabetes?",
        "What should I do for a mild headache?"
    ]
)

# Launch the interface
if __name__ == "__main__":
    logger.info("Starting the application")
    demo.launch()