File size: 4,549 Bytes
693c6e9
 
 
 
 
 
 
849fdd5
23624f5
849fdd5
23624f5
 
849fdd5
 
b1d9540
849fdd5
8666754
849fdd5
 
 
 
23624f5
b1d9540
 
 
 
ca3be5f
adc12fa
8666754
b790aae
ee59722
b790aae
 
8666754
 
23624f5
ca3be5f
7b94d99
ca3be5f
8666754
849fdd5
 
 
 
23624f5
849fdd5
23624f5
8666754
7b94d99
 
693c6e9
 
 
 
849fdd5
8666754
693c6e9
8666754
e659be2
216f23b
 
 
 
 
 
 
 
e659be2
216f23b
ce8c007
 
5a3a00b
ce8c007
 
 
 
ee59722
 
e659be2
ce8c007
e659be2
 
 
 
 
 
 
 
ee59722
e659be2
 
 
 
ac74534
693c6e9
 
e659be2
849fdd5
23624f5
c69da53
8666754
 
ee59722
8666754
23624f5
 
 
 
 
 
 
ee59722
23624f5
 
83787dd
849fdd5
8666754
fa5479f
693c6e9
fa5479f
83787dd
693c6e9
83787dd
23624f5
ee59722
8666754
 
 
 
849fdd5
83787dd
 
 
 
 
849fdd5
83787dd
849fdd5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
Diabetes Version
@aim: Demo for testing purposes only
@inquiries: Dr M As'ad 
@email: [email protected]
"""

# Import necessary libraries
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from openai import OpenAI
import os
import torch
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from huggingface_hub import login

# Initialize the OpenAI client
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1",
    api_key=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
)

api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if api_token:
    login(token=api_token)
else:
    st.error("API token is not set in the environment variables.")

# Define model links
model_links = {
    "HAH-2024-v0.1": "drmasad/HAH-2024-v0.11"
}

# Set selected model
selected_model = "HAH-2024-v0.1"

# Display welcome message
st.title("Welcome to HAH-2024-v0.1")

# Sidebar setup
temp_values = st.sidebar.slider("Select a temperature value", 0.0, 1.0, (0.5))
def reset_conversation():
    st.session_state.conversation = []
    st.session_state.messages = []

st.sidebar.button("Reset Chat", on_click=reset_conversation)
st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.image("https://www.hmgaihub.com/untitled.png")
st.sidebar.markdown("*Generated content may be inaccurate or false.*")
st.sidebar.markdown("*This is an under development project.*")
st.sidebar.markdown("*Not a replacement for medical advice from a doctor.*")

# Loading message placeholder
loading_message = st.empty()

def load_model(selected_model_name):
    loading_message.info("Loading the model, please wait...")
    model_name = model_links[selected_model_name]

    # Define the BitsAndBytesConfig for quantization
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=False,
        llm_int8_enable_fp32_cpu_offload=True,
    )

    # Load the model with quantization settings directly applied
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        trust_remote_code=True,
    )

    # Additional configurations and training enhancements
    model.config.use_cache = False
    model = prepare_model_for_kbit_training(model)

    # If using PEFT or other enhancements, configure here
    peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
    )
    model = get_peft_model(model, peft_config)

    tokenizer = AutoTokenizer.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
    )

    # Clear the loading message
    loading_message.success("Model is ready. Now we are ready!")

    return model, tokenizer


# Load model and tokenizer
model, tokenizer = load_model(selected_model)

# Chat application logic
if "messages" not in st.session_state:
    st.session_state.messages = []

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input("Ask me anything about diabetes"):
    with st.chat_message("user"):
        st.markdown(prompt)

    st.session_state.messages.append({"role": "user", "content": prompt})

    instructions = """
    Act as a highly knowledgeable doctor with special interest in diabetes, skilled at explaining complex medical information in a way that is easy to understand for patients without a medical background. Your responses should not only demonstrate empathy and care but also uphold a high standard of medical accuracy and reliability. Respond precisely to what the patient needs in a professional, accurate, and reassuring manner, avoiding any unnecessary information.
    """

    full_prompt = f"<s>[INST] {prompt} [/INST] {instructions}</s>"

    with st.chat_message("assistant"):
        result = pipeline(
            task="text-generation",
            model=model,
            tokenizer=tokenizer,
            max_length=1024,
            temperature=temp_values
        )(full_prompt)

        generated_text = result[0]['generated_text']
        response = generated_text.split("</s>")[-1].strip()

        st.markdown(response)

    st.session_state.messages.append({"role": "assistant", "content": response})