Update app.py
Browse files
app.py
CHANGED
@@ -58,12 +58,20 @@ PRIOR CONCURRENT THERAPY:
|
|
58 |
* No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific, <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
|
59 |
"""
|
60 |
|
|
|
61 |
def load_model(model_name):
|
62 |
global model, tokenizer
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
66 |
|
|
|
67 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
68 |
|
69 |
### Instruction:
|
@@ -77,22 +85,17 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
|
|
77 |
|
78 |
@spaces.GPU
|
79 |
def generate_response(system_instruction, user_input):
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
# {"role": "user", "content": user_input},
|
84 |
-
# ]
|
85 |
-
# encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
86 |
-
# model_inputs = encodeds.to(device)
|
87 |
-
|
88 |
-
inputs = tokenizer([
|
89 |
-
alpaca_prompt.format(
|
90 |
-
system_instruction, # instruction
|
91 |
-
user_input, # input
|
92 |
-
"", # output - leave this blank for generation!
|
93 |
-
)
|
94 |
-
], return_tensors = "pt").to("cuda")
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
meta_config = {
|
97 |
"do_sample": True,
|
98 |
"temperature": 0.1,
|
@@ -101,25 +104,17 @@ def generate_response(system_instruction, user_input):
|
|
101 |
"repetition_penalty": 1.2,
|
102 |
"use_cache": True
|
103 |
}
|
104 |
-
|
105 |
generation_config = GenerationConfig(**meta_config)
|
106 |
|
|
|
107 |
with torch.no_grad():
|
108 |
outputs = model.generate(**inputs, generation_config=generation_config)
|
109 |
-
|
110 |
-
|
111 |
-
assistant_response = decoded_output.split("### Response:")[-1].strip()
|
112 |
-
|
113 |
-
# tokenizer.batch_decode(outputs)
|
114 |
-
# # Generate model response
|
115 |
-
# with torch.no_grad():
|
116 |
-
# generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|
117 |
-
# # Find everything after the <|assistant|> tag
|
118 |
-
# decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
119 |
-
# assistant_response = decoded_output.split("<|assistant|>")[-1].strip()
|
120 |
|
121 |
return assistant_response
|
122 |
|
|
|
123 |
# Gradio interface setup
|
124 |
with gr.Blocks() as demo:
|
125 |
gr.Markdown("# Clinical Trial Chatbot with Model Selection")
|
|
|
58 |
* No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific, <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
|
59 |
"""
|
60 |
|
61 |
+
# Adjust load_model to ensure models are loaded to the correct device on demand
|
62 |
def load_model(model_name):
|
63 |
global model, tokenizer
|
64 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
65 |
+
|
66 |
+
# Remove previous model from GPU memory if switching models
|
67 |
+
if 'model' in globals():
|
68 |
+
del model
|
69 |
+
torch.cuda.empty_cache()
|
70 |
+
|
71 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to(device)
|
72 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
73 |
|
74 |
+
|
75 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
76 |
|
77 |
### Instruction:
|
|
|
85 |
|
86 |
@spaces.GPU
|
87 |
def generate_response(system_instruction, user_input):
|
88 |
+
# Determine the correct device
|
89 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
90 |
+
model.to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
+
# Prepare the input in the appropriate format and move it to the correct device
|
93 |
+
inputs = tokenizer(
|
94 |
+
[alpaca_prompt.format(system_instruction, user_input, "")],
|
95 |
+
return_tensors="pt"
|
96 |
+
).to(device)
|
97 |
+
|
98 |
+
# Define generation configuration
|
99 |
meta_config = {
|
100 |
"do_sample": True,
|
101 |
"temperature": 0.1,
|
|
|
104 |
"repetition_penalty": 1.2,
|
105 |
"use_cache": True
|
106 |
}
|
|
|
107 |
generation_config = GenerationConfig(**meta_config)
|
108 |
|
109 |
+
# Generate response with error handling for device mismatch issues
|
110 |
with torch.no_grad():
|
111 |
outputs = model.generate(**inputs, generation_config=generation_config)
|
112 |
+
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
113 |
+
assistant_response = decoded_output.split("### Response:")[-1].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
return assistant_response
|
116 |
|
117 |
+
|
118 |
# Gradio interface setup
|
119 |
with gr.Blocks() as demo:
|
120 |
gr.Markdown("# Clinical Trial Chatbot with Model Selection")
|