nafisneehal commited on
Commit
15fd1d7
·
verified ·
1 Parent(s): 13aec60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -29
app.py CHANGED
@@ -58,12 +58,20 @@ PRIOR CONCURRENT THERAPY:
58
  * No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific, <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
59
  """
60
 
 
61
  def load_model(model_name):
62
  global model, tokenizer
63
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
64
- model.to(device)
 
 
 
 
 
 
65
  tokenizer = AutoTokenizer.from_pretrained(model_name)
66
 
 
67
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
68
 
69
  ### Instruction:
@@ -77,22 +85,17 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
77
 
78
  @spaces.GPU
79
  def generate_response(system_instruction, user_input):
80
- # # Format the prompt using the messages structure
81
- # messages = [
82
- # {"role": "system", "content": system_instruction},
83
- # {"role": "user", "content": user_input},
84
- # ]
85
- # encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
86
- # model_inputs = encodeds.to(device)
87
-
88
- inputs = tokenizer([
89
- alpaca_prompt.format(
90
- system_instruction, # instruction
91
- user_input, # input
92
- "", # output - leave this blank for generation!
93
- )
94
- ], return_tensors = "pt").to("cuda")
95
 
 
 
 
 
 
 
 
96
  meta_config = {
97
  "do_sample": True,
98
  "temperature": 0.1,
@@ -101,25 +104,17 @@ def generate_response(system_instruction, user_input):
101
  "repetition_penalty": 1.2,
102
  "use_cache": True
103
  }
104
-
105
  generation_config = GenerationConfig(**meta_config)
106
 
 
107
  with torch.no_grad():
108
  outputs = model.generate(**inputs, generation_config=generation_config)
109
-
110
- decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
111
- assistant_response = decoded_output.split("### Response:")[-1].strip()
112
-
113
- # tokenizer.batch_decode(outputs)
114
- # # Generate model response
115
- # with torch.no_grad():
116
- # generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
117
- # # Find everything after the <|assistant|> tag
118
- # decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
119
- # assistant_response = decoded_output.split("<|assistant|>")[-1].strip()
120
 
121
  return assistant_response
122
 
 
123
  # Gradio interface setup
124
  with gr.Blocks() as demo:
125
  gr.Markdown("# Clinical Trial Chatbot with Model Selection")
 
58
  * No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific, <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
59
  """
60
 
61
+ # Adjust load_model to ensure models are loaded to the correct device on demand
62
  def load_model(model_name):
63
  global model, tokenizer
64
+ device = "cuda" if torch.cuda.is_available() else "cpu"
65
+
66
+ # Remove previous model from GPU memory if switching models
67
+ if 'model' in globals():
68
+ del model
69
+ torch.cuda.empty_cache()
70
+
71
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to(device)
72
  tokenizer = AutoTokenizer.from_pretrained(model_name)
73
 
74
+
75
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
76
 
77
  ### Instruction:
 
85
 
86
  @spaces.GPU
87
  def generate_response(system_instruction, user_input):
88
+ # Determine the correct device
89
+ device = "cuda" if torch.cuda.is_available() else "cpu"
90
+ model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # Prepare the input in the appropriate format and move it to the correct device
93
+ inputs = tokenizer(
94
+ [alpaca_prompt.format(system_instruction, user_input, "")],
95
+ return_tensors="pt"
96
+ ).to(device)
97
+
98
+ # Define generation configuration
99
  meta_config = {
100
  "do_sample": True,
101
  "temperature": 0.1,
 
104
  "repetition_penalty": 1.2,
105
  "use_cache": True
106
  }
 
107
  generation_config = GenerationConfig(**meta_config)
108
 
109
+ # Generate response with error handling for device mismatch issues
110
  with torch.no_grad():
111
  outputs = model.generate(**inputs, generation_config=generation_config)
112
+ decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
113
+ assistant_response = decoded_output.split("### Response:")[-1].strip()
 
 
 
 
 
 
 
 
 
114
 
115
  return assistant_response
116
 
117
+
118
  # Gradio interface setup
119
  with gr.Blocks() as demo:
120
  gr.Markdown("# Clinical Trial Chatbot with Model Selection")