Spaces:

nafisneehal
/

trialbrain-baseline-features-chat

Sleeping

App Files Files Community

nafisneehal commited on Nov 15, 2024

Commit

15fd1d7

verified ·

1 Parent(s): 13aec60

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -29

app.py CHANGED Viewed

@@ -58,12 +58,20 @@ PRIOR CONCURRENT THERAPY:
 * No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific,  <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
 """
 def load_model(model_name):
     global model, tokenizer
-    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
-    model.to(device)
     tokenizer = AutoTokenizer.from_pretrained(model_name)
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
@@ -77,22 +85,17 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
 @spaces.GPU
 def generate_response(system_instruction, user_input):
-    # # Format the prompt using the messages structure
-    # messages = [
-    #     {"role": "system", "content": system_instruction},
-    #     {"role": "user", "content": user_input},
-    # ]
-    # encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
-    # model_inputs = encodeds.to(device)
-    inputs = tokenizer([
-        alpaca_prompt.format(
-            system_instruction, # instruction
-            user_input, # input
-            "", # output - leave this blank for generation!
-        )
-    ], return_tensors = "pt").to("cuda")
     meta_config = {
         "do_sample": True,
         "temperature": 0.1,
@@ -101,25 +104,17 @@ def generate_response(system_instruction, user_input):
         "repetition_penalty": 1.2,
         "use_cache": True
     }
     generation_config = GenerationConfig(**meta_config)
     with torch.no_grad():
         outputs = model.generate(**inputs, generation_config=generation_config)
-    decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
-    assistant_response = decoded_output.split("### Response:")[-1].strip()
-    # tokenizer.batch_decode(outputs)
-    # # Generate model response
-    # with torch.no_grad():
-    #     generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
-    # # Find everything after the <|assistant|> tag
-    # decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    # assistant_response = decoded_output.split("<|assistant|>")[-1].strip()
     return assistant_response
 # Gradio interface setup
 with gr.Blocks() as demo:
     gr.Markdown("# Clinical Trial Chatbot with Model Selection")

 * No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific,  <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
 """
+# Adjust load_model to ensure models are loaded to the correct device on demand
 def load_model(model_name):
     global model, tokenizer
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Remove previous model from GPU memory if switching models
+    if 'model' in globals():
+        del model
+        torch.cuda.empty_cache()
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to(device)
     tokenizer = AutoTokenizer.from_pretrained(model_name)
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 @spaces.GPU
 def generate_response(system_instruction, user_input):
+    # Determine the correct device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device)
+    # Prepare the input in the appropriate format and move it to the correct device
+    inputs = tokenizer(
+        [alpaca_prompt.format(system_instruction, user_input, "")],
+        return_tensors="pt"
+    ).to(device)
+    # Define generation configuration
     meta_config = {
         "do_sample": True,
         "temperature": 0.1,
         "repetition_penalty": 1.2,
         "use_cache": True
     }
     generation_config = GenerationConfig(**meta_config)
+    # Generate response with error handling for device mismatch issues
     with torch.no_grad():
         outputs = model.generate(**inputs, generation_config=generation_config)
+        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+        assistant_response = decoded_output.split("### Response:")[-1].strip()
     return assistant_response
 # Gradio interface setup
 with gr.Blocks() as demo:
     gr.Markdown("# Clinical Trial Chatbot with Model Selection")