File size: 4,541 Bytes
b5fe4ce
6ebcdab
 
 
 
55db529
 
3dc4061
3202d1b
3dc4061
 
 
 
 
 
3202d1b
 
 
3dc4061
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8867e8a
b5fe4ce
 
c30f436
3dc4061
b5fe4ce
 
3dc4061
 
237d9d2
bdec0c5
bf9669d
bdec0c5
 
bf9669d
bdec0c5
81395fc
b5fe4ce
 
 
bf07a1e
 
 
 
 
237d9d2
 
bf07a1e
237d9d2
1b29238
3202d1b
bf07a1e
3202d1b
1b29238
bf07a1e
3202d1b
1b29238
81395fc
6c18f3e
1b29238
bf07a1e
1b29238
bf07a1e
 
1b29238
237d9d2
1b29238
483b4b0
b5fe4ce
1b29238
b01335d
1b29238
b01335d
 
 
 
3202d1b
b01335d
 
 
 
1b29238
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import random
from textwrap import wrap

# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
    # Combine user input and system prompt
    formatted_input = f"<s>[INST]{system_prompt} {user_input}[/INST]"

    # Encode the input text
    encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
    model_inputs = encodeds.to(device)

    # Generate a response using the model
    output = model.generate(
        **model_inputs,
        max_length=max_length,
        use_cache=True,
        early_stopping=True,
        bos_token_id=model.config.bos_token_id,
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.eos_token_id,
        temperature=0.1,
        do_sample=True
    )

    # Decode the response
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return response_text

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Use the base model's ID
base_model_id = "HuggingFaceH4/zephyr-7b-beta"
model_directory = "pseudolab/K23_MiniMed"

# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta", trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("pseudolab/K23_MiniMed", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# Specify the configuration class for the model
#model_config = AutoConfig.from_pretrained(base_model_id)

# Load the PEFT model with the specified configuration
#peft_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=model_config)

# Load the PEFT model
peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed")
peft_model = MistralForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed")

class ChatBot:
    def __init__(self):
        self.history = []

class ChatBot:
    def __init__(self):
        # Initialize the ChatBot class with an empty history
        self.history = []

    def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
        # Combine the user's input with the system prompt
        formatted_input = f"<s>[INST]{system_prompt} {user_input}[/INST]"

        # Encode the formatted input using the tokenizer
        user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")

        # Generate a response using the PEFT model
        response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)

        # Decode the generated response to text
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
        
        return response_text  # Return the generated response

bot = ChatBot()

title = "👋🏻토닉의 미스트랄메드 채팅에 오신 것을 환영합니다🚀👋🏻Welcome to Tonic's MistralMed Chat🚀"
description = "이 공간을 사용하여 현재 모델을 테스트할 수 있습니다. [pseudolab/K23_MiniMed](https://huggingface.co/pseudolab/K23_MiniMed) 또는 이 공간을 복제하고 로컬 또는 🤗HuggingFace에서 사용할 수 있습니다. [Discord에서 함께 만들기 위해 Discord에 가입하십시오](https://discord.gg/VqTxc76K3u). You can use this Space to test out the current model [pseudolab/K23_MiniMed](https://huggingface.co/pseudolab/K23_MiniMed) or duplicate this Space and use it locally or on 🤗HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
examples = [["[Question:] What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]

iface = gr.Interface(
    fn=bot.predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "text"],  # Take user input and system prompt separately
    outputs="text",
    theme="ParityError/Anime"
)

iface.launch()