File size: 5,033 Bytes
6ebcdab
 
 
 
55db529
 
3dc4061
3202d1b
3dc4061
 
 
 
 
 
3202d1b
3dc4061
3202d1b
3dc4061
3202d1b
3dc4061
 
b1cc2f7
3dc4061
3202d1b
b1cc2f7
3dc4061
 
 
 
 
 
 
 
b1cc2f7
3dc4061
7da94d2
3dc4061
 
 
b1cc2f7
3dc4061
 
 
 
 
 
 
 
 
 
 
8867e8a
6b13747
054ea8d
c30f436
3dc4061
6b13747
7d0b03f
3dc4061
 
237d9d2
054ea8d
0caf514
6b13747
47ca87a
6b13747
bd0b549
 
bdec0c5
bf9669d
bdec0c5
 
bf9669d
bdec0c5
81395fc
054ea8d
 
 
bf07a1e
 
b1cc2f7
 
237d9d2
1b29238
b1cc2f7
 
 
1b29238
bf07a1e
b1cc2f7
1b29238
b1cc2f7
 
1b29238
bf07a1e
1b29238
b1cc2f7
 
 
 
 
 
1b29238
237d9d2
1b29238
a3eaa33
 
 
b01335d
1b29238
b01335d
 
 
 
3202d1b
b01335d
 
 
 
1b29238
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import random
from textwrap import wrap

# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
    """
    Generates text using a large language model, given a user input and a system prompt.
    Args:
        user_input: The user's input text to generate a response for.
        system_prompt: Optional system prompt.
    Returns:
        A string containing the generated text in the Falcon-like format.
    """
    # Combine user input and system prompt
    formatted_input = f"{{{{ {system_prompt} }}}}\nUser: {user_input}\nFalcon:"

    # Encode the input text
    encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
    model_inputs = encodeds.to(device)

    # Generate a response using the model
    output = model.generate(
        **model_inputs,
        max_length=500,
        use_cache=True,
        early_stopping=False,
        bos_token_id=model.config.bos_token_id,
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.eos_token_id,
        temperature=0.4,
        do_sample=True
    )

    # Decode the response
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return response_text

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Use the base model's ID
base_model_id = "tiiuae/falcon-7b-instruct"
model_directory = "Tonic/GaiaMiniMed"

# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct", trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("Tonic/mistralmed", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# Load the GaiaMiniMed model with the specified configuration
# Load the Peft model with a specific configuration
peft_model = PeftModel.from_pretrained("Tonic/GaiaMiniMed")
peft_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct")
peft_model = PeftModel.from_pretrained(peft_model, "Tonic/GaiaMiniMed")


# Specify the configuration class for the model
#model_config = AutoConfig.from_pretrained(base_model_id)

# Load the PEFT model with the specified configuration
#peft_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=model_config)

# Load the PEFT model
# peft_config = PeftConfig.from_pretrained("Tonic/mistralmed", token="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")
# peft_model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
# peft_model = PeftModel.from_pretrained(peft_model, "Tonic/mistralmed", token="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")

class ChatBot:
    def __init__(self, system_prompt="You are an expert medical analyst:"):
        self.system_prompt = system_prompt
        self.history = []

    def predict(self, user_input):
        # Combine the user's input with the system prompt in Falcon format
        formatted_input = f"{{{{ {self.system_prompt} }}}}\nUser: {user_input}\nFalcon:"

        # Encode the formatted input using the tokenizer
        input_ids = tokenizer.encode(formatted_input, return_tensors="pt", add_special_tokens=False)

        # Generate a response using the model
        response = model.generate(input_ids, max_length=max_length, use_cache=True, early_stopping=True, bos_token_id=model.config.bos_token_id, eos_token_id=model.config.eos_token_id, pad_token_id=model.config.eos_token_id, temperature=0.1, do_sample=True)

        # Decode the generated response to text
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)

        # Append the Falcon-like conversation to the history
        self.history.append(formatted_input)
        self.history.append(response_text)

        return response_text

bot = ChatBot()

title = "👋🏻Welcome to Tonic's GaiaMiniMed Chat🚀"
description = "You can use this Space to test out the current model [(Tonic/GaiaMiniMed)](https://huggingface.co/Tonic/GaiaMiniMed) or duplicate this Space and use it locally or on 🤗HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
examples = [["What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]

iface = gr.Interface(
    fn=bot.predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "text"],  # Take user input and system prompt separately
    outputs="text",
    theme="ParityError/Anime"
)

iface.launch()